Last active
May 17, 2023 16:31
-
-
Save ad1happy2go/b989c9a8667f1ac555dd2030ea686f41 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/schema.avsc /tmp/ | |
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/source /tmp/source_parquet | |
NOW=$(date '+%Y%m%dt%H%M%S') | |
bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
--jars /home/hadoop/v_3.1/hudi-hive-sync-bundle-0.13.0.jar,/home/hadoop/v_3.1/hudi-spark3.1-bundle_2.12-0.13.0.jar \ | |
/home/hadoop/v_3.1/hudi-utilities-slim-bundle_2.12-0.13.0.jar \ | |
--target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
--target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
--source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
--op UPSERT --enable-sync --spark-master yarn \ | |
--hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
--hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
--hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
--hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
--hoodie-conf hoodie.metadata.enable=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
--hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
--hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
--hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
NOW=$(date '+%Y%m%dt%H%M%S') | |
bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
/home/hadoop/v_3.1/hudi-utilities-bundle_2.12-0.13.0.jar \ | |
--target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
--target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
--source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
--op UPSERT --enable-sync --spark-master yarn \ | |
--hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
--hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
--hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
--hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
--hoodie-conf hoodie.metadata.enable=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
--hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
--hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
--hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
================LOGS======== | |
SLF4J: Class path contains multiple SLF4J bindings. | |
SLF4J: Found binding in [jar:file:/home/hadoop/spark-3.1.3-bin-hadoop3.2/jars/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
2023-05-17 16:27:49,985 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
2023-05-17 16:27:50,431 WARN deltastreamer.SchedulerConfGenerator: Job Scheduling Configs will not be in effect as spark.scheduler.mode is not set to FAIR at instantiation time. Continuing without scheduling configs | |
2023-05-17 16:27:50,478 INFO spark.SparkContext: Running Spark version 3.1.3 | |
2023-05-17 16:27:50,524 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:27:50,524 INFO resource.ResourceUtils: No custom resources configured for spark.driver. | |
2023-05-17 16:27:50,524 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:27:50,525 INFO spark.SparkContext: Submitted application: delta-streamer-stocks20230517t162744 | |
2023-05-17 16:27:50,547 INFO resource.ResourceProfile: Default ResourceProfile created, executor resources: Map(cores -> name: cores, amount: 1, script: , vendor: , memory -> name: memory, amount: 1024, script: , vendor: , offHeap -> name: offHeap, amount: 0, script: , vendor: ), task resources: Map(cpus -> name: cpus, amount: 1.0) | |
2023-05-17 16:27:50,560 INFO resource.ResourceProfile: Limiting resource is cpu | |
2023-05-17 16:27:50,561 INFO resource.ResourceProfileManager: Added ResourceProfile id: 0 | |
2023-05-17 16:27:50,608 INFO spark.SecurityManager: Changing view acls to: hadoop | |
2023-05-17 16:27:50,609 INFO spark.SecurityManager: Changing modify acls to: hadoop | |
2023-05-17 16:27:50,609 INFO spark.SecurityManager: Changing view acls groups to: | |
2023-05-17 16:27:50,609 INFO spark.SecurityManager: Changing modify acls groups to: | |
2023-05-17 16:27:50,609 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); groups with view permissions: Set(); users with modify permissions: Set(hadoop); groups with modify permissions: Set() | |
2023-05-17 16:27:50,624 INFO Configuration.deprecation: mapred.output.compression.codec is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.codec | |
2023-05-17 16:27:50,624 INFO Configuration.deprecation: mapred.output.compress is deprecated. Instead, use mapreduce.output.fileoutputformat.compress | |
2023-05-17 16:27:50,624 INFO Configuration.deprecation: mapred.output.compression.type is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.type | |
2023-05-17 16:27:50,798 INFO util.Utils: Successfully started service 'sparkDriver' on port 40569. | |
2023-05-17 16:27:50,838 INFO spark.SparkEnv: Registering MapOutputTracker | |
2023-05-17 16:27:50,872 INFO spark.SparkEnv: Registering BlockManagerMaster | |
2023-05-17 16:27:50,894 INFO storage.BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information | |
2023-05-17 16:27:50,894 INFO storage.BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up | |
2023-05-17 16:27:50,898 INFO spark.SparkEnv: Registering BlockManagerMasterHeartbeat | |
2023-05-17 16:27:50,912 INFO storage.DiskBlockManager: Created local directory at /mnt/tmp/blockmgr-91bf89fe-e6c9-4ff2-8bb7-bebdc88cb343 | |
2023-05-17 16:27:50,934 INFO memory.MemoryStore: MemoryStore started with capacity 366.3 MiB | |
2023-05-17 16:27:50,950 INFO spark.SparkEnv: Registering OutputCommitCoordinator | |
2023-05-17 16:27:51,045 INFO util.log: Logging initialized @2465ms to org.sparkproject.jetty.util.log.Slf4jLog | |
2023-05-17 16:27:51,120 INFO server.Server: jetty-9.4.40.v20210413; built: 2021-04-13T20:42:42.668Z; git: b881a572662e1943a14ae12e7e1207989f218b74; jvm 1.8.0_372-b07 | |
2023-05-17 16:27:51,140 INFO server.Server: Started @2562ms | |
2023-05-17 16:27:51,178 INFO server.AbstractConnector: Started ServerConnector@303a5119{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:27:51,178 INFO util.Utils: Successfully started service 'SparkUI' on port 8090. | |
2023-05-17 16:27:51,212 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@56ba8773{/jobs,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,215 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@5cf8edcf{/jobs/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,215 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@629f066f{/jobs/job,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,216 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@20ed3303{/jobs/job/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,217 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3a627c80{/stages,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,218 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@963176{/stages/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,218 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4cafa9aa{/stages/stage,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,219 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@31cb96e1{/stages/stage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,220 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@64030b91{/stages/pool,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,221 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4d23015c{/stages/pool/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,222 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@441cc260{/storage,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,223 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@26dcd8c0{/storage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,224 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@444548a0{/storage/rdd,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,224 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@773c0293{/storage/rdd/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,225 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3b569985{/environment,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,226 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@2dbd803f{/environment/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,227 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@31ddd4a4{/executors,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,228 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@5b22b970{/executors/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,229 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7df60067{/executors/threadDump,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,229 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@529cfee5{/executors/threadDump/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,240 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@319854f0{/static,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,241 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@77a281fc{/,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,242 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@2bfbffb2{/api,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,243 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@27fde870{/jobs/job/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,244 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@5ac7aa18{/stages/stage/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:51,286 INFO ui.SparkUI: Bound SparkUI to 0.0.0.0, and started at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:27:51,315 INFO spark.SparkContext: Added JAR file:///home/hadoop/v_3.1/hudi-hive-sync-bundle-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,316 INFO spark.SparkContext: Added JAR file:///home/hadoop/v_3.1/hudi-spark3.1-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-spark3.1-bundle_2.12-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,316 INFO spark.SparkContext: Added JAR file:/home/hadoop/v_3.1/hudi-utilities-slim-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,465 INFO executor.Executor: Starting executor ID driver on host ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:27:51,482 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,527 INFO client.TransportClientFactory: Successfully created connection to ip-172-31-19-77.us-east-2.compute.internal/172.31.19.77:40569 after 23 ms (0 ms spent in bootstraps) | |
2023-05-17 16:27:51,536 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/fetchFileTemp6459761610157986790.tmp | |
2023-05-17 16:27:51,692 INFO executor.Executor: Adding file:/mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/hudi-utilities-slim-bundle_2.12-0.13.0.jar to class loader | |
2023-05-17 16:27:51,692 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,692 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-hive-sync-bundle-0.13.0.jar to /mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/fetchFileTemp5681242645375305844.tmp | |
2023-05-17 16:27:51,760 INFO executor.Executor: Adding file:/mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/hudi-hive-sync-bundle-0.13.0.jar to class loader | |
2023-05-17 16:27:51,761 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-spark3.1-bundle_2.12-0.13.0.jar with timestamp 1684340870471 | |
2023-05-17 16:27:51,761 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:40569/jars/hudi-spark3.1-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/fetchFileTemp3884407421316116342.tmp | |
2023-05-17 16:27:51,847 INFO executor.Executor: Adding file:/mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2/userFiles-e56f7d71-05d6-4005-ada0-78aaa28c43a3/hudi-spark3.1-bundle_2.12-0.13.0.jar to class loader | |
2023-05-17 16:27:51,855 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 40491. | |
2023-05-17 16:27:51,856 INFO netty.NettyBlockTransferService: Server created on ip-172-31-19-77.us-east-2.compute.internal:40491 | |
2023-05-17 16:27:51,857 INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy | |
2023-05-17 16:27:51,865 INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 40491, None) | |
2023-05-17 16:27:51,868 INFO storage.BlockManagerMasterEndpoint: Registering block manager ip-172-31-19-77.us-east-2.compute.internal:40491 with 366.3 MiB RAM, BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 40491, None) | |
2023-05-17 16:27:51,870 INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 40491, None) | |
2023-05-17 16:27:51,871 INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 40491, None) | |
2023-05-17 16:27:52,024 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@57a2ed35{/metrics/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:27:52,427 WARN config.DFSPropertiesConfiguration: Cannot find HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf | |
2023-05-17 16:27:52,442 INFO utilities.UtilHelpers: Adding overridden properties to file properties. | |
2023-05-17 16:27:52,504 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. | |
2023-05-17 16:27:52,647 INFO deltastreamer.HoodieDeltaStreamer: Creating delta streamer with configs: | |
hoodie.auto.adjust.lock.configs: true | |
hoodie.cleaner.policy.failed.writes: EAGER | |
hoodie.datasource.hive_sync.auto_create_database: true | |
hoodie.datasource.hive_sync.database: default | |
hoodie.datasource.hive_sync.ignore_exceptions: false | |
hoodie.datasource.hive_sync.jdbcurl: jdbc:hive2://ip-172-31-19-77.us-east-2.compute.internal:10000 | |
hoodie.datasource.hive_sync.mode: hms | |
hoodie.datasource.hive_sync.partition_extractor_class: org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor | |
hoodie.datasource.hive_sync.partition_fields: date | |
hoodie.datasource.hive_sync.skip_ro_suffix: true | |
hoodie.datasource.hive_sync.sync_as_datasource: true | |
hoodie.datasource.hive_sync.sync_comment: true | |
hoodie.datasource.write.hive_style_partitioning: false | |
hoodie.datasource.write.keygenerator.type: SIMPLE | |
hoodie.datasource.write.partitionpath.field: date | |
hoodie.datasource.write.precombine.field: ts | |
hoodie.datasource.write.reconcile.schema: false | |
hoodie.datasource.write.recordkey.field: symbol | |
hoodie.deltastreamer.schemaprovider.source.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.schemaprovider.target.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.source.dfs.root: /tmp/source_parquet | |
hoodie.index.type: BLOOM | |
hoodie.metadata.enable: true | |
hoodie.write.concurrency.mode: single_writer | |
hoodie.write.lock.provider: org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider | |
hoodie.write.lock.zookeeper.base_path: /hudi | |
hoodie.write.lock.zookeeper.port: 2181 | |
hoodie.write.lock.zookeeper.url: ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:27:52,654 INFO fs.FSUtils: Resolving file /tmp/schema.avscto be a remote file. | |
2023-05-17 16:27:52,891 INFO factory.HoodieSparkKeyGeneratorFactory: The value of hoodie.datasource.write.keygenerator.type is empty, use SIMPLE | |
2023-05-17 16:27:52,907 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t162744 as hoodie table /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:52,999 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:53,013 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:53,017 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:53,018 INFO table.HoodieTableMetaClient: Finished initializing Table of type COPY_ON_WRITE from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:53,221 INFO helpers.DFSPathSelector: Using path selector org.apache.hudi.utilities.sources.helpers.DFSPathSelector | |
2023-05-17 16:27:53,221 INFO deltastreamer.HoodieDeltaStreamer: Delta Streamer running only single round | |
2023-05-17 16:27:53,224 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:53,226 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:53,229 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:53,248 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:53,252 INFO deltastreamer.DeltaSync: Checkpoint to resume from : Optional.empty | |
2023-05-17 16:27:53,258 INFO helpers.DFSPathSelector: Root path => /tmp/source_parquet source limit => 9223372036854775807 | |
2023-05-17 16:27:53,603 INFO memory.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 418.0 KiB, free 365.9 MiB) | |
2023-05-17 16:27:53,963 INFO memory.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 45.8 KiB, free 365.8 MiB) | |
2023-05-17 16:27:53,965 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 45.8 KiB, free: 366.3 MiB) | |
2023-05-17 16:27:53,971 INFO spark.SparkContext: Created broadcast 0 from textFile at JsonDFSSource.java:54 | |
2023-05-17 16:27:54,094 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library | |
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path | |
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) | |
at java.lang.Runtime.loadLibrary0(Runtime.java:843) | |
at java.lang.System.loadLibrary(System.java:1136) | |
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32) | |
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71) | |
at java.lang.Class.forName0(Native Method) | |
at java.lang.Class.forName(Class.java:348) | |
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:2532) | |
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2497) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180) | |
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:110) | |
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:79) | |
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:137) | |
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) | |
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.RDD.$anonfun$isEmpty$1(RDD.scala:1557) | |
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1557) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty$(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:545) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:460) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:364) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:215) | |
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:213) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:592) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) | |
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) | |
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) | |
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) | |
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) | |
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
2023-05-17 16:27:54,097 ERROR lzo.LzoCodec: Cannot load native-lzo without native-hadoop | |
2023-05-17 16:27:54,108 INFO mapred.FileInputFormat: Total input files to process : 2 | |
2023-05-17 16:27:54,184 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:545 | |
2023-05-17 16:27:54,200 INFO scheduler.DAGScheduler: Got job 0 (isEmpty at DeltaSync.java:545) with 1 output partitions | |
2023-05-17 16:27:54,201 INFO scheduler.DAGScheduler: Final stage: ResultStage 0 (isEmpty at DeltaSync.java:545) | |
2023-05-17 16:27:54,201 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:27:54,203 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:27:54,210 INFO scheduler.DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67), which has no missing parents | |
2023-05-17 16:27:54,238 INFO memory.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 5.8 KiB, free 365.8 MiB) | |
2023-05-17 16:27:54,246 INFO memory.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 365.8 MiB) | |
2023-05-17 16:27:54,247 INFO storage.BlockManagerInfo: Added broadcast_1_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 3.2 KiB, free: 366.3 MiB) | |
2023-05-17 16:27:54,248 INFO spark.SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:54,282 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:27:54,283 INFO scheduler.TaskSchedulerImpl: Adding task set 0.0 with 1 tasks resource profile 0 | |
2023-05-17 16:27:54,356 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:54,387 INFO executor.Executor: Running task 0.0 in stage 0.0 (TID 0) | |
2023-05-17 16:27:54,793 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:27:54,903 INFO executor.Executor: Finished task 0.0 in stage 0.0 (TID 0). 1316 bytes result sent to driver | |
2023-05-17 16:27:54,931 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 600 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:27:54,941 INFO scheduler.DAGScheduler: ResultStage 0 (isEmpty at DeltaSync.java:545) finished in 0.706 s | |
2023-05-17 16:27:54,951 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:54,964 INFO scheduler.DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:27:54,965 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished | |
2023-05-17 16:27:54,967 INFO scheduler.DAGScheduler: Job 0 finished: isEmpty at DeltaSync.java:545, took 0.782023 s | |
2023-05-17 16:27:54,994 INFO deltastreamer.DeltaSync: Setting up new Hoodie Write Client | |
2023-05-17 16:27:55,022 INFO embedded.EmbeddedTimelineService: Starting Timeline service !! | |
2023-05-17 16:27:55,024 INFO embedded.EmbeddedTimelineService: Overriding hostIp to (ip-172-31-19-77.us-east-2.compute.internal) found in spark-conf. It was null | |
2023-05-17 16:27:55,039 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:27:55,040 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:27:55,065 INFO util.log: Logging initialized @6487ms to org.apache.hudi.org.eclipse.jetty.util.log.Slf4jLog | |
2023-05-17 16:27:55,285 INFO javalin.Javalin: | |
__ __ _ __ __ | |
/ /____ _ _ __ ____ _ / /(_)____ / // / | |
__ / // __ `/| | / // __ `// // // __ \ / // /_ | |
/ /_/ // /_/ / | |/ // /_/ // // // / / / /__ __/ | |
\____/ \__,_/ |___/ \__,_//_//_//_/ /_/ /_/ | |
https://javalin.io/documentation | |
2023-05-17 16:27:55,288 INFO javalin.Javalin: Starting Javalin ... | |
2023-05-17 16:27:55,295 INFO javalin.Javalin: You are running Javalin 4.6.7 (released October 24, 2022. Your Javalin version is 205 days old. Consider checking for a newer version.). | |
2023-05-17 16:27:55,420 INFO storage.BlockManagerInfo: Removed broadcast_1_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 3.2 KiB, free: 366.3 MiB) | |
2023-05-17 16:27:55,492 INFO server.Server: jetty-9.4.48.v20220622; built: 2022-06-21T20:42:25.880Z; git: 6b67c5719d1f4371b33655ff2d047d24e171e49a; jvm 1.8.0_372-b07 | |
2023-05-17 16:27:55,650 INFO server.Server: Started @7072ms | |
2023-05-17 16:27:55,650 INFO javalin.Javalin: Listening on http://localhost:38649/ | |
2023-05-17 16:27:55,651 INFO javalin.Javalin: Javalin started in 364ms \o/ | |
2023-05-17 16:27:55,651 INFO service.TimelineService: Starting Timeline server on port :38649 | |
2023-05-17 16:27:55,651 INFO embedded.EmbeddedTimelineService: Started embedded timeline server at ip-172-31-19-77.us-east-2.compute.internal:38649 | |
2023-05-17 16:27:55,677 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:27:55,707 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:27:55,728 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:665 | |
2023-05-17 16:27:55,731 INFO scheduler.DAGScheduler: Got job 1 (isEmpty at DeltaSync.java:665) with 1 output partitions | |
2023-05-17 16:27:55,731 INFO scheduler.DAGScheduler: Final stage: ResultStage 1 (isEmpty at DeltaSync.java:665) | |
2023-05-17 16:27:55,731 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:27:55,731 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:27:55,733 INFO scheduler.DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558), which has no missing parents | |
2023-05-17 16:27:55,737 INFO memory.MemoryStore: Block broadcast_2 stored as values in memory (estimated size 12.2 KiB, free 365.8 MiB) | |
2023-05-17 16:27:55,743 INFO memory.MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 6.4 KiB, free 365.8 MiB) | |
2023-05-17 16:27:55,745 INFO storage.BlockManagerInfo: Added broadcast_2_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 6.4 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:55,746 INFO spark.SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:55,747 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:27:55,747 INFO scheduler.TaskSchedulerImpl: Adding task set 1.0 with 1 tasks resource profile 0 | |
2023-05-17 16:27:55,749 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:55,749 INFO executor.Executor: Running task 0.0 in stage 1.0 (TID 1) | |
2023-05-17 16:27:55,762 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:27:55,826 INFO executor.Executor: Finished task 0.0 in stage 1.0 (TID 1). 1072 bytes result sent to driver | |
2023-05-17 16:27:55,858 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 110 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:27:55,860 INFO scheduler.DAGScheduler: ResultStage 1 (isEmpty at DeltaSync.java:665) finished in 0.125 s | |
2023-05-17 16:27:55,862 INFO scheduler.DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:27:55,862 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:55,862 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished | |
2023-05-17 16:27:55,865 INFO scheduler.DAGScheduler: Job 1 finished: isEmpty at DeltaSync.java:665, took 0.135272 s | |
2023-05-17 16:27:55,871 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,878 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:55,888 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,889 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,896 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:55,905 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:27:55,911 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,917 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:55,924 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,924 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,931 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:55,942 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,945 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:55,948 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,950 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:55,953 WARN metadata.HoodieBackedTableMetadata: Metadata table was not found at path /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:55,954 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:27:55,954 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:27:55,966 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517162755867 action: commit | |
2023-05-17 16:27:55,967 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517162755867__commit__REQUESTED] | |
2023-05-17 16:27:55,975 INFO deltastreamer.DeltaSync: Starting commit : 20230517162755867 | |
2023-05-17 16:27:55,976 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,978 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:55,981 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,981 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:55,984 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__REQUESTED]} | |
2023-05-17 16:27:56,124 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:56,129 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,134 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:56,167 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__REQUESTED]} | |
2023-05-17 16:27:56,168 INFO metadata.HoodieBackedTableMetadataWriter: Creating a new metadata table in /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata at instant 00000000000000 | |
2023-05-17 16:27:56,168 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata as hoodie table /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,669 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,672 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,676 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,677 INFO table.HoodieTableMetaClient: Finished initializing Table of type MERGE_ON_READ from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,678 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:56,680 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,684 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:56,684 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,689 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,693 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,700 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:56,713 INFO view.AbstractTableFileSystemView: Took 4 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:56,741 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:56,743 INFO metadata.HoodieBackedTableMetadataWriter: Creating 1 file groups for partition files with base fileId files- at instant time 00000000000000 | |
2023-05-17 16:27:56,748 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:27:56,748 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:27:56,753 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=0} does not exist. Create a new file | |
2023-05-17 16:27:56,806 INFO metadata.HoodieBackedTableMetadataWriter: Initializing metadata table by using file listings in /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:56,806 INFO metadata.HoodieBackedTableMetadataWriter: Triggering empty Commit to metadata to initialize | |
2023-05-17 16:27:56,810 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:56,811 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:56,812 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:27:56,814 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:27:56,814 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,820 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,830 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,830 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,832 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:56,837 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:27:56,837 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:27:56,839 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,842 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,844 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,844 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,847 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:56,848 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:27:56,848 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,851 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,856 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,856 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,858 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:27:56,859 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:27:56,859 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:27:56,860 INFO client.BaseHoodieWriteClient: Generate a new instant time: 00000000000000 action: deltacommit | |
2023-05-17 16:27:56,860 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>00000000000000__deltacommit__REQUESTED] | |
2023-05-17 16:27:56,872 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,874 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:56,877 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,877 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:56,881 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>00000000000000__deltacommit__REQUESTED]} | |
2023-05-17 16:27:56,882 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:27:56,882 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:27:56,889 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:27:56,890 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:27:56,904 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:56,904 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:56,971 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:27:56,981 INFO scheduler.DAGScheduler: Job 2 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.010169 s | |
2023-05-17 16:27:56,989 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=0}, InputPartitionStat={}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:27:56,990 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:27:56,991 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:56,992 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:56,993 INFO commit.UpsertPartitioner: Total Buckets :0, buckets info => {}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:27:57,011 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/00000000000000.deltacommit.requested | |
2023-05-17 16:27:57,033 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:27:57,460 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:27:57,632 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:27:57,632 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 00000000000000 | |
2023-05-17 16:27:57,765 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:27:57,768 INFO scheduler.DAGScheduler: Job 3 finished: collect at HoodieJavaRDD.java:163, took 0.000563 s | |
2023-05-17 16:27:57,769 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:0 numReplaceFileIds:0 | |
2023-05-17 16:27:57,829 INFO storage.BlockManagerInfo: Removed broadcast_2_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 6.4 KiB, free: 366.3 MiB) | |
2023-05-17 16:27:57,974 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:27:57,976 INFO scheduler.DAGScheduler: Job 4 finished: collect at HoodieJavaRDD.java:163, took 0.000445 s | |
2023-05-17 16:27:57,976 INFO commit.BaseSparkCommitActionExecutor: Committing 00000000000000, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:27:57,986 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:27:57,986 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:27:58,414 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/00000000000000.deltacommit | |
2023-05-17 16:27:58,414 INFO timeline.HoodieActiveTimeline: Completed [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:27:58,414 INFO commit.BaseSparkCommitActionExecutor: Committed 00000000000000 | |
2023-05-17 16:27:58,415 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,416 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:58,419 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,419 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,420 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:27:58,420 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:27:58,421 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:27:58,502 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:27:58,503 INFO scheduler.DAGScheduler: Job 5 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.000330 s | |
2023-05-17 16:27:58,505 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:27:58,954 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:58,956 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:58,957 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:58,958 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,959 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:58,960 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,962 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:27:58,962 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:58,963 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:58,963 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:58,968 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:27:58,970 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:27:58,970 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,971 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:27:58,973 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:27:58,974 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:27:58,975 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:27:58,976 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:27:58,976 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:27:58,976 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:27:58,976 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:27:58,977 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:27:59,016 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:27:59,020 INFO scheduler.DAGScheduler: Registering RDD 15 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 3 | |
2023-05-17 16:27:59,022 INFO scheduler.DAGScheduler: Registering RDD 19 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 2 | |
2023-05-17 16:27:59,023 INFO scheduler.DAGScheduler: Got job 6 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:27:59,023 INFO scheduler.DAGScheduler: Final stage: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:27:59,023 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 3) | |
2023-05-17 16:27:59,024 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 3) | |
2023-05-17 16:27:59,028 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:27:59,049 INFO memory.MemoryStore: Block broadcast_3 stored as values in memory (estimated size 31.3 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,050 INFO memory.MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 14.8 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,051 INFO storage.BlockManagerInfo: Added broadcast_3_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 14.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:59,051 INFO spark.SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:59,053 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:27:59,053 INFO scheduler.TaskSchedulerImpl: Adding task set 2.0 with 2 tasks resource profile 0 | |
2023-05-17 16:27:59,055 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,056 INFO executor.Executor: Running task 0.0 in stage 2.0 (TID 2) | |
2023-05-17 16:27:59,105 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:27:59,403 INFO executor.Executor: Finished task 0.0 in stage 2.0 (TID 2). 1202 bytes result sent to driver | |
2023-05-17 16:27:59,404 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 2.0 (TID 3) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,405 INFO executor.Executor: Running task 1.0 in stage 2.0 (TID 3) | |
2023-05-17 16:27:59,412 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_2.json:0+363815 | |
2023-05-17 16:27:59,418 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 364 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:27:59,500 INFO executor.Executor: Finished task 1.0 in stage 2.0 (TID 3). 1202 bytes result sent to driver | |
2023-05-17 16:27:59,502 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 2.0 (TID 3) in 98 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:27:59,503 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:59,503 INFO scheduler.DAGScheduler: ShuffleMapStage 2 (mapToPair at HoodieJavaRDD.java:135) finished in 0.471 s | |
2023-05-17 16:27:59,504 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:27:59,505 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:27:59,505 INFO scheduler.DAGScheduler: waiting: Set(ShuffleMapStage 3, ResultStage 4) | |
2023-05-17 16:27:59,506 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:27:59,510 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:27:59,522 INFO memory.MemoryStore: Block broadcast_4 stored as values in memory (estimated size 25.4 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,524 INFO memory.MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 12.2 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,525 INFO storage.BlockManagerInfo: Added broadcast_4_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 12.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:59,525 INFO spark.SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:59,526 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:27:59,526 INFO scheduler.TaskSchedulerImpl: Adding task set 3.0 with 2 tasks resource profile 0 | |
2023-05-17 16:27:59,530 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 3.0 (TID 4) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,530 INFO executor.Executor: Running task 0.0 in stage 3.0 (TID 4) | |
2023-05-17 16:27:59,564 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:27:59,565 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 8 ms | |
2023-05-17 16:27:59,606 INFO memory.MemoryStore: Block rdd_17_0 stored as values in memory (estimated size 8.2 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,607 INFO storage.BlockManagerInfo: Added rdd_17_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 8.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:59,616 INFO executor.Executor: Finished task 0.0 in stage 3.0 (TID 4). 1417 bytes result sent to driver | |
2023-05-17 16:27:59,617 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 3.0 (TID 5) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,618 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 3.0 (TID 4) in 90 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:27:59,618 INFO executor.Executor: Running task 1.0 in stage 3.0 (TID 5) | |
2023-05-17 16:27:59,624 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:27:59,624 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:27:59,640 INFO memory.MemoryStore: Block rdd_17_1 stored as values in memory (estimated size 5.8 KiB, free 365.8 MiB) | |
2023-05-17 16:27:59,641 INFO storage.BlockManagerInfo: Added rdd_17_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 5.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:59,645 INFO executor.Executor: Finished task 1.0 in stage 3.0 (TID 5). 1417 bytes result sent to driver | |
2023-05-17 16:27:59,648 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 3.0 (TID 5) in 31 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:27:59,648 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:59,649 INFO scheduler.DAGScheduler: ShuffleMapStage 3 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.134 s | |
2023-05-17 16:27:59,649 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:27:59,649 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:27:59,650 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 4) | |
2023-05-17 16:27:59,650 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:27:59,650 INFO scheduler.DAGScheduler: Submitting ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:27:59,653 INFO memory.MemoryStore: Block broadcast_5 stored as values in memory (estimated size 4.6 KiB, free 365.7 MiB) | |
2023-05-17 16:27:59,654 INFO memory.MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.7 MiB) | |
2023-05-17 16:27:59,655 INFO storage.BlockManagerInfo: Added broadcast_5_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 2.6 KiB, free: 366.2 MiB) | |
2023-05-17 16:27:59,655 INFO spark.SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:59,655 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:27:59,656 INFO scheduler.TaskSchedulerImpl: Adding task set 4.0 with 2 tasks resource profile 0 | |
2023-05-17 16:27:59,657 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 4.0 (TID 6) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,661 INFO executor.Executor: Running task 1.0 in stage 4.0 (TID 6) | |
2023-05-17 16:27:59,665 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (120.0 B) non-empty blocks including 2 (120.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:27:59,665 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:27:59,671 INFO executor.Executor: Finished task 1.0 in stage 4.0 (TID 6). 1244 bytes result sent to driver | |
2023-05-17 16:27:59,673 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 4.0 (TID 7) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,673 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 4.0 (TID 6) in 16 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:27:59,676 INFO executor.Executor: Running task 0.0 in stage 4.0 (TID 7) | |
2023-05-17 16:27:59,681 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:27:59,682 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:27:59,683 INFO executor.Executor: Finished task 0.0 in stage 4.0 (TID 7). 1227 bytes result sent to driver | |
2023-05-17 16:27:59,684 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 4.0 (TID 7) in 12 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:27:59,684 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:59,685 INFO scheduler.DAGScheduler: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.034 s | |
2023-05-17 16:27:59,685 INFO scheduler.DAGScheduler: Job 6 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:27:59,686 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 4: Stage finished | |
2023-05-17 16:27:59,686 INFO scheduler.DAGScheduler: Job 6 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.670010 s | |
2023-05-17 16:27:59,862 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:27:59,862 INFO scheduler.DAGScheduler: Got job 7 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:27:59,863 INFO scheduler.DAGScheduler: Final stage: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:27:59,863 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:27:59,863 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:27:59,864 INFO scheduler.DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:27:59,904 INFO memory.MemoryStore: Block broadcast_6 stored as values in memory (estimated size 559.0 KiB, free 365.2 MiB) | |
2023-05-17 16:27:59,908 INFO memory.MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 209.7 KiB, free 365.0 MiB) | |
2023-05-17 16:27:59,909 INFO storage.BlockManagerInfo: Added broadcast_6_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 209.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:27:59,910 INFO spark.SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:27:59,910 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:27:59,911 INFO scheduler.TaskSchedulerImpl: Adding task set 5.0 with 1 tasks resource profile 0 | |
2023-05-17 16:27:59,915 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 5.0 (TID 8) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:27:59,916 INFO executor.Executor: Running task 0.0 in stage 5.0 (TID 8) | |
2023-05-17 16:27:59,991 INFO executor.Executor: Finished task 0.0 in stage 5.0 (TID 8). 797 bytes result sent to driver | |
2023-05-17 16:27:59,992 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 5.0 (TID 8) in 80 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:27:59,992 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool | |
2023-05-17 16:27:59,993 INFO scheduler.DAGScheduler: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) finished in 0.128 s | |
2023-05-17 16:27:59,993 INFO scheduler.DAGScheduler: Job 7 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:27:59,993 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 5: Stage finished | |
2023-05-17 16:27:59,994 INFO scheduler.DAGScheduler: Job 7 finished: collect at HoodieSparkEngineContext.java:137, took 0.132020 s | |
2023-05-17 16:28:00,138 INFO storage.BlockManager: Removing RDD 12 | |
2023-05-17 16:28:00,151 INFO storage.BlockManagerInfo: Removed broadcast_3_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 14.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,162 INFO storage.BlockManagerInfo: Removed broadcast_6_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 209.7 KiB, free: 366.2 MiB) | |
2023-05-17 16:28:00,178 INFO storage.BlockManagerInfo: Removed broadcast_4_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 12.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:28:00,189 INFO storage.BlockManager: Removing RDD 4 | |
2023-05-17 16:28:00,211 INFO storage.BlockManagerInfo: Removed broadcast_5_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 2.6 KiB, free: 366.2 MiB) | |
2023-05-17 16:28:00,211 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:103 | |
2023-05-17 16:28:00,212 INFO scheduler.DAGScheduler: Got job 8 (collect at HoodieSparkEngineContext.java:103) with 1 output partitions | |
2023-05-17 16:28:00,212 INFO scheduler.DAGScheduler: Final stage: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) | |
2023-05-17 16:28:00,212 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:00,213 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:00,218 INFO scheduler.DAGScheduler: Submitting ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103), which has no missing parents | |
2023-05-17 16:28:00,260 INFO memory.MemoryStore: Block broadcast_7 stored as values in memory (estimated size 558.8 KiB, free 365.3 MiB) | |
2023-05-17 16:28:00,264 INFO memory.MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 210.0 KiB, free 365.1 MiB) | |
2023-05-17 16:28:00,264 INFO storage.BlockManagerInfo: Added broadcast_7_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 210.0 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,265 INFO spark.SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,266 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:00,266 INFO scheduler.TaskSchedulerImpl: Adding task set 6.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:00,267 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 6.0 (TID 9) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4332 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,268 INFO executor.Executor: Running task 0.0 in stage 6.0 (TID 9) | |
2023-05-17 16:28:00,310 INFO executor.Executor: Finished task 0.0 in stage 6.0 (TID 9). 797 bytes result sent to driver | |
2023-05-17 16:28:00,311 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 6.0 (TID 9) in 44 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:00,311 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,312 INFO scheduler.DAGScheduler: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) finished in 0.093 s | |
2023-05-17 16:28:00,313 INFO scheduler.DAGScheduler: Job 8 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:00,313 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 6: Stage finished | |
2023-05-17 16:28:00,314 INFO scheduler.DAGScheduler: Job 8 finished: collect at HoodieSparkEngineContext.java:103, took 0.102585 s | |
2023-05-17 16:28:00,328 INFO bloom.SparkHoodieBloomIndexHelper: Input parallelism: 2, Index parallelism: 2 | |
2023-05-17 16:28:00,344 INFO spark.SparkContext: Starting job: countByKey at SparkHoodieBloomIndexHelper.java:195 | |
2023-05-17 16:28:00,346 INFO scheduler.DAGScheduler: Registering RDD 27 (countByKey at SparkHoodieBloomIndexHelper.java:195) as input to shuffle 4 | |
2023-05-17 16:28:00,347 INFO scheduler.DAGScheduler: Got job 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) with 2 output partitions | |
2023-05-17 16:28:00,347 INFO scheduler.DAGScheduler: Final stage: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) | |
2023-05-17 16:28:00,347 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 8) | |
2023-05-17 16:28:00,347 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 8) | |
2023-05-17 16:28:00,352 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:28:00,357 INFO memory.MemoryStore: Block broadcast_8 stored as values in memory (estimated size 27.0 KiB, free 365.1 MiB) | |
2023-05-17 16:28:00,359 INFO memory.MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 12.8 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,359 INFO storage.BlockManagerInfo: Added broadcast_8_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 12.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,360 INFO spark.SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,360 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:00,360 INFO scheduler.TaskSchedulerImpl: Adding task set 8.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:00,363 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 8.0 (TID 10) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,365 INFO executor.Executor: Running task 0.0 in stage 8.0 (TID 10) | |
2023-05-17 16:28:00,373 INFO storage.BlockManager: Found block rdd_17_0 locally | |
2023-05-17 16:28:00,380 INFO executor.Executor: Finished task 0.0 in stage 8.0 (TID 10). 1116 bytes result sent to driver | |
2023-05-17 16:28:00,381 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 8.0 (TID 11) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,382 INFO executor.Executor: Running task 1.0 in stage 8.0 (TID 11) | |
2023-05-17 16:28:00,382 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 8.0 (TID 10) in 20 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:00,388 INFO storage.BlockManager: Found block rdd_17_1 locally | |
2023-05-17 16:28:00,398 INFO executor.Executor: Finished task 1.0 in stage 8.0 (TID 11). 1116 bytes result sent to driver | |
2023-05-17 16:28:00,400 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 8.0 (TID 11) in 20 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:00,400 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,402 INFO scheduler.DAGScheduler: ShuffleMapStage 8 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.047 s | |
2023-05-17 16:28:00,402 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:00,402 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:00,403 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 9) | |
2023-05-17 16:28:00,403 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:00,403 INFO scheduler.DAGScheduler: Submitting ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:28:00,405 INFO memory.MemoryStore: Block broadcast_9 stored as values in memory (estimated size 4.6 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,408 INFO memory.MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,408 INFO storage.BlockManagerInfo: Added broadcast_9_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 2.6 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,409 INFO spark.SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,409 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:00,409 INFO scheduler.TaskSchedulerImpl: Adding task set 9.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:00,411 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 9.0 (TID 12) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,411 INFO executor.Executor: Running task 0.0 in stage 9.0 (TID 12) | |
2023-05-17 16:28:00,414 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,414 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,416 INFO executor.Executor: Finished task 0.0 in stage 9.0 (TID 12). 1227 bytes result sent to driver | |
2023-05-17 16:28:00,416 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 9.0 (TID 13) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,417 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 9.0 (TID 12) in 7 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:00,417 INFO executor.Executor: Running task 1.0 in stage 9.0 (TID 13) | |
2023-05-17 16:28:00,419 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,420 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,421 INFO executor.Executor: Finished task 1.0 in stage 9.0 (TID 13). 1227 bytes result sent to driver | |
2023-05-17 16:28:00,422 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 9.0 (TID 13) in 6 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:00,422 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,423 INFO scheduler.DAGScheduler: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.019 s | |
2023-05-17 16:28:00,424 INFO scheduler.DAGScheduler: Job 9 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:00,424 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 9: Stage finished | |
2023-05-17 16:28:00,425 INFO scheduler.DAGScheduler: Job 9 finished: countByKey at SparkHoodieBloomIndexHelper.java:195, took 0.080529 s | |
2023-05-17 16:28:00,427 INFO bloom.BucketizedBloomCheckPartitioner: TotalBuckets 0, min_buckets/partition 1 | |
2023-05-17 16:28:00,515 INFO rdd.MapPartitionsRDD: Removing RDD 17 from persistence list | |
2023-05-17 16:28:00,521 INFO rdd.MapPartitionsRDD: Removing RDD 35 from persistence list | |
2023-05-17 16:28:00,522 INFO storage.BlockManager: Removing RDD 17 | |
2023-05-17 16:28:00,522 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:00,523 INFO storage.BlockManager: Removing RDD 35 | |
2023-05-17 16:28:00,524 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:00,545 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:28:00,548 INFO scheduler.DAGScheduler: Registering RDD 29 (mapToPair at SparkHoodieBloomIndexHelper.java:164) as input to shuffle 8 | |
2023-05-17 16:28:00,548 INFO scheduler.DAGScheduler: Registering RDD 36 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 5 | |
2023-05-17 16:28:00,549 INFO scheduler.DAGScheduler: Registering RDD 35 (flatMapToPair at SparkHoodieBloomIndexHelper.java:175) as input to shuffle 6 | |
2023-05-17 16:28:00,550 INFO scheduler.DAGScheduler: Registering RDD 45 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 7 | |
2023-05-17 16:28:00,550 INFO scheduler.DAGScheduler: Got job 10 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:28:00,550 INFO scheduler.DAGScheduler: Final stage: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:28:00,550 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 14) | |
2023-05-17 16:28:00,550 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 14) | |
2023-05-17 16:28:00,557 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:28:00,560 INFO memory.MemoryStore: Block broadcast_10 stored as values in memory (estimated size 25.1 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,562 INFO memory.MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 12.1 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,563 INFO storage.BlockManagerInfo: Added broadcast_10_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 12.1 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,564 INFO spark.SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,564 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:00,565 INFO scheduler.TaskSchedulerImpl: Adding task set 12.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:00,566 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 12.0 (TID 14) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,566 INFO executor.Executor: Running task 0.0 in stage 12.0 (TID 14) | |
2023-05-17 16:28:00,571 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,571 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,599 INFO executor.Executor: Finished task 0.0 in stage 12.0 (TID 14). 1417 bytes result sent to driver | |
2023-05-17 16:28:00,599 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 12.0 (TID 15) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,600 INFO executor.Executor: Running task 1.0 in stage 12.0 (TID 15) | |
2023-05-17 16:28:00,600 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 12.0 (TID 14) in 34 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:00,606 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,606 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,622 INFO executor.Executor: Finished task 1.0 in stage 12.0 (TID 15). 1417 bytes result sent to driver | |
2023-05-17 16:28:00,624 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 12.0 (TID 15) in 25 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:00,625 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 12.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,625 INFO scheduler.DAGScheduler: ShuffleMapStage 12 (mapToPair at HoodieJavaRDD.java:135) finished in 0.067 s | |
2023-05-17 16:28:00,626 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:00,626 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:00,626 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15, ShuffleMapStage 14) | |
2023-05-17 16:28:00,626 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:00,627 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:28:00,634 INFO memory.MemoryStore: Block broadcast_11 stored as values in memory (estimated size 9.9 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,636 INFO memory.MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 5.1 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,636 INFO storage.BlockManagerInfo: Added broadcast_11_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 5.1 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,640 INFO spark.SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,640 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:00,640 INFO scheduler.TaskSchedulerImpl: Adding task set 14.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:00,642 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 14.0 (TID 16) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,643 INFO executor.Executor: Running task 0.0 in stage 14.0 (TID 16) | |
2023-05-17 16:28:00,656 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (3.5 KiB) non-empty blocks including 1 (3.5 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,656 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,657 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,657 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,682 INFO memory.MemoryStore: Block rdd_43_0 stored as values in memory (estimated size 8.2 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,683 INFO storage.BlockManagerInfo: Added rdd_43_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 8.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,687 INFO executor.Executor: Finished task 0.0 in stage 14.0 (TID 16). 1417 bytes result sent to driver | |
2023-05-17 16:28:00,688 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 14.0 (TID 17) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,688 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 14.0 (TID 16) in 47 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:00,689 INFO executor.Executor: Running task 1.0 in stage 14.0 (TID 17) | |
2023-05-17 16:28:00,695 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (2.4 KiB) non-empty blocks including 1 (2.4 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,695 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms | |
2023-05-17 16:28:00,697 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,697 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,712 INFO memory.MemoryStore: Block rdd_43_1 stored as values in memory (estimated size 5.8 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,713 INFO storage.BlockManagerInfo: Added rdd_43_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 5.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,716 INFO executor.Executor: Finished task 1.0 in stage 14.0 (TID 17). 1417 bytes result sent to driver | |
2023-05-17 16:28:00,717 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 14.0 (TID 17) in 29 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:00,718 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 14.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,718 INFO scheduler.DAGScheduler: ShuffleMapStage 14 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.090 s | |
2023-05-17 16:28:00,719 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:00,719 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:00,719 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15) | |
2023-05-17 16:28:00,719 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:00,719 INFO scheduler.DAGScheduler: Submitting ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:28:00,724 INFO memory.MemoryStore: Block broadcast_12 stored as values in memory (estimated size 4.6 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,726 INFO memory.MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.0 MiB) | |
2023-05-17 16:28:00,726 INFO storage.BlockManagerInfo: Added broadcast_12_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 2.6 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:00,727 INFO spark.SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,728 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:00,728 INFO scheduler.TaskSchedulerImpl: Adding task set 15.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:00,729 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 15.0 (TID 18) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,731 INFO executor.Executor: Running task 1.0 in stage 15.0 (TID 18) | |
2023-05-17 16:28:00,733 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (194.0 B) non-empty blocks including 2 (194.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,733 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,742 INFO executor.Executor: Finished task 1.0 in stage 15.0 (TID 18). 1284 bytes result sent to driver | |
2023-05-17 16:28:00,743 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 15.0 (TID 19) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,743 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 15.0 (TID 18) in 14 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:00,744 INFO executor.Executor: Running task 0.0 in stage 15.0 (TID 19) | |
2023-05-17 16:28:00,746 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:00,746 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:00,747 INFO executor.Executor: Finished task 0.0 in stage 15.0 (TID 19). 1227 bytes result sent to driver | |
2023-05-17 16:28:00,750 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 15.0 (TID 19) in 7 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:00,750 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:00,751 INFO scheduler.DAGScheduler: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.030 s | |
2023-05-17 16:28:00,752 INFO scheduler.DAGScheduler: Job 10 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:00,752 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 15: Stage finished | |
2023-05-17 16:28:00,752 INFO scheduler.DAGScheduler: Job 10 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.207430 s | |
2023-05-17 16:28:00,754 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=99, numUpdates=0}, InputPartitionStat={2018/08/31=WorkloadStat {numInserts=99, numUpdates=0}}, OutputPartitionStat={}, operationType=UPSERT} | |
2023-05-17 16:28:00,754 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:28:00,890 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:28:00,891 INFO scheduler.DAGScheduler: Got job 11 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:28:00,891 INFO scheduler.DAGScheduler: Final stage: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:28:00,891 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:00,892 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:00,892 INFO scheduler.DAGScheduler: Submitting ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:28:00,952 INFO memory.MemoryStore: Block broadcast_13 stored as values in memory (estimated size 559.7 KiB, free 364.4 MiB) | |
2023-05-17 16:28:00,956 INFO memory.MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 209.9 KiB, free 364.2 MiB) | |
2023-05-17 16:28:00,956 INFO storage.BlockManagerInfo: Added broadcast_13_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 209.9 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:00,957 INFO spark.SparkContext: Created broadcast 13 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:00,957 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:00,958 INFO scheduler.TaskSchedulerImpl: Adding task set 16.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:00,959 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 16.0 (TID 20) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:00,959 INFO executor.Executor: Running task 0.0 in stage 16.0 (TID 20) | |
2023-05-17 16:28:00,999 INFO executor.Executor: Finished task 0.0 in stage 16.0 (TID 20). 834 bytes result sent to driver | |
2023-05-17 16:28:01,003 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 16.0 (TID 20) in 44 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:01,003 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 16.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:01,006 INFO scheduler.DAGScheduler: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) finished in 0.113 s | |
2023-05-17 16:28:01,006 INFO scheduler.DAGScheduler: Job 11 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:01,007 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 16: Stage finished | |
2023-05-17 16:28:01,007 INFO scheduler.DAGScheduler: Job 11 finished: collectAsMap at UpsertPartitioner.java:279, took 0.116434 s | |
2023-05-17 16:28:01,008 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:01,010 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:01,010 INFO commit.UpsertPartitioner: For partitionPath : 2018/08/31 Small Files => [] | |
2023-05-17 16:28:01,010 INFO commit.UpsertPartitioner: After small file assignment: unassignedInserts => 99, totalInsertBuckets => 1, recordsPerBucket => 122880 | |
2023-05-17 16:28:01,015 INFO commit.UpsertPartitioner: Total insert buckets for partition path 2018/08/31 => [(InsertBucket {bucketNumber=0, weight=1.0},1.0)] | |
2023-05-17 16:28:01,015 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=INSERT, fileIdPrefix=2028dec8-f9de-44ba-937e-d7743d78ebaf, partitionPath=2018/08/31}}, | |
Partition to insert buckets => {2018/08/31=[(InsertBucket {bucketNumber=0, weight=1.0},1.0)]}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:28:01,036 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/20230517162755867.commit.requested | |
2023-05-17 16:28:01,466 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/20230517162755867.inflight | |
2023-05-17 16:28:01,512 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:28:01,512 INFO commit.BaseCommitActionExecutor: Auto commit disabled for 20230517162755867 | |
2023-05-17 16:28:01,527 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:696 | |
2023-05-17 16:28:01,530 INFO scheduler.DAGScheduler: Registering RDD 49 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 9 | |
2023-05-17 16:28:01,530 INFO scheduler.DAGScheduler: Got job 12 (sum at DeltaSync.java:696) with 1 output partitions | |
2023-05-17 16:28:01,530 INFO scheduler.DAGScheduler: Final stage: ResultStage 22 (sum at DeltaSync.java:696) | |
2023-05-17 16:28:01,530 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 21) | |
2023-05-17 16:28:01,531 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 21) | |
2023-05-17 16:28:01,535 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:28:01,582 INFO memory.MemoryStore: Block broadcast_14 stored as values in memory (estimated size 565.7 KiB, free 363.7 MiB) | |
2023-05-17 16:28:01,585 INFO memory.MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 212.7 KiB, free 363.5 MiB) | |
2023-05-17 16:28:01,586 INFO storage.BlockManagerInfo: Added broadcast_14_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 212.7 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:01,586 INFO spark.SparkContext: Created broadcast 14 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:01,587 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:28:01,587 INFO scheduler.TaskSchedulerImpl: Adding task set 21.0 with 2 tasks resource profile 0 | |
2023-05-17 16:28:01,588 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 21.0 (TID 21) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:01,589 INFO executor.Executor: Running task 0.0 in stage 21.0 (TID 21) | |
2023-05-17 16:28:01,613 INFO storage.BlockManager: Found block rdd_43_0 locally | |
2023-05-17 16:28:01,624 INFO executor.Executor: Finished task 0.0 in stage 21.0 (TID 21). 1072 bytes result sent to driver | |
2023-05-17 16:28:01,625 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 21.0 (TID 22) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:01,625 INFO executor.Executor: Running task 1.0 in stage 21.0 (TID 22) | |
2023-05-17 16:28:01,626 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 21.0 (TID 21) in 38 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:28:01,652 INFO storage.BlockManager: Found block rdd_43_1 locally | |
2023-05-17 16:28:01,661 INFO executor.Executor: Finished task 1.0 in stage 21.0 (TID 22). 1072 bytes result sent to driver | |
2023-05-17 16:28:01,662 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 21.0 (TID 22) in 37 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:28:01,662 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:01,662 INFO scheduler.DAGScheduler: ShuffleMapStage 21 (mapToPair at HoodieJavaRDD.java:135) finished in 0.126 s | |
2023-05-17 16:28:01,662 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:01,662 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:01,662 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 22) | |
2023-05-17 16:28:01,662 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:01,663 INFO scheduler.DAGScheduler: Submitting ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696), which has no missing parents | |
2023-05-17 16:28:01,706 INFO memory.MemoryStore: Block broadcast_15 stored as values in memory (estimated size 573.2 KiB, free 362.9 MiB) | |
2023-05-17 16:28:01,713 INFO memory.MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 216.7 KiB, free 362.7 MiB) | |
2023-05-17 16:28:01,713 INFO storage.BlockManagerInfo: Added broadcast_15_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 216.7 KiB, free: 365.4 MiB) | |
2023-05-17 16:28:01,715 INFO spark.SparkContext: Created broadcast 15 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:01,715 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:01,715 INFO scheduler.TaskSchedulerImpl: Adding task set 22.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:01,717 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 22.0 (TID 23) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:01,717 INFO executor.Executor: Running task 0.0 in stage 22.0 (TID 23) | |
2023-05-17 16:28:01,805 INFO storage.BlockManagerInfo: Removed broadcast_14_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 212.7 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:01,840 INFO storage.BlockManagerInfo: Removed broadcast_8_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 12.8 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:01,841 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (6.2 KiB) non-empty blocks including 2 (6.2 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:01,841 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:01,856 INFO storage.BlockManagerInfo: Removed broadcast_13_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 209.9 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:01,862 INFO storage.BlockManagerInfo: Removed broadcast_11_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 5.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:01,864 INFO queue.SimpleExecutor: Starting consumer, consuming records from the records iterator directly | |
2023-05-17 16:28:01,865 INFO storage.BlockManagerInfo: Removed broadcast_10_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 12.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:01,867 INFO storage.BlockManagerInfo: Removed broadcast_12_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 2.6 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:01,875 INFO storage.BlockManagerInfo: Removed broadcast_7_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 210.0 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:01,887 INFO storage.BlockManagerInfo: Removed broadcast_9_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 2.6 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:01,915 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t162744. Falling back to direct markers. | |
2023-05-17 16:28:01,920 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t162744/.hoodie/.temp/20230517162755867/2018/08/31/2028dec8-f9de-44ba-937e-d7743d78ebaf-0_0-22-23_20230517162755867.parquet.marker.CREATE | |
2023-05-17 16:28:01,924 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t162744/.hoodie/.temp/20230517162755867/2018/08/31/2028dec8-f9de-44ba-937e-d7743d78ebaf-0_0-22-23_20230517162755867.parquet.marker.CREATE in 8 ms | |
2023-05-17 16:28:02,002 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:28:02,337 INFO io.HoodieCreateHandle: New CreateHandle for partition :2018/08/31 with fileId 2028dec8-f9de-44ba-937e-d7743d78ebaf-0 | |
2023-05-17 16:28:02,396 INFO io.HoodieCreateHandle: Closing the file 2028dec8-f9de-44ba-937e-d7743d78ebaf-0 as we are done with all the records 99 | |
2023-05-17 16:28:02,397 INFO hadoop.InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 17352 | |
2023-05-17 16:28:02,518 INFO io.HoodieCreateHandle: CreateHandle for partitionPath 2018/08/31 fileID 2028dec8-f9de-44ba-937e-d7743d78ebaf-0, took 650 ms. | |
2023-05-17 16:28:02,521 INFO memory.MemoryStore: Block rdd_53_0 stored as values in memory (estimated size 376.0 B, free 365.1 MiB) | |
2023-05-17 16:28:02,521 INFO storage.BlockManagerInfo: Added rdd_53_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 376.0 B, free: 366.0 MiB) | |
2023-05-17 16:28:02,529 INFO executor.Executor: Finished task 0.0 in stage 22.0 (TID 23). 1146 bytes result sent to driver | |
2023-05-17 16:28:02,530 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 22.0 (TID 23) in 813 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:02,530 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 22.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:02,531 INFO scheduler.DAGScheduler: ResultStage 22 (sum at DeltaSync.java:696) finished in 0.867 s | |
2023-05-17 16:28:02,532 INFO scheduler.DAGScheduler: Job 12 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:02,532 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 22: Stage finished | |
2023-05-17 16:28:02,532 INFO scheduler.DAGScheduler: Job 12 finished: sum at DeltaSync.java:696, took 1.004675 s | |
2023-05-17 16:28:02,540 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:697 | |
2023-05-17 16:28:02,543 INFO scheduler.DAGScheduler: Got job 13 (sum at DeltaSync.java:697) with 1 output partitions | |
2023-05-17 16:28:02,543 INFO scheduler.DAGScheduler: Final stage: ResultStage 28 (sum at DeltaSync.java:697) | |
2023-05-17 16:28:02,544 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 27) | |
2023-05-17 16:28:02,544 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:02,549 INFO scheduler.DAGScheduler: Submitting ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697), which has no missing parents | |
2023-05-17 16:28:02,609 INFO memory.MemoryStore: Block broadcast_16 stored as values in memory (estimated size 573.2 KiB, free 364.5 MiB) | |
2023-05-17 16:28:02,613 INFO memory.MemoryStore: Block broadcast_16_piece0 stored as bytes in memory (estimated size 216.7 KiB, free 364.3 MiB) | |
2023-05-17 16:28:02,614 INFO storage.BlockManagerInfo: Added broadcast_16_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 216.7 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:02,614 INFO spark.SparkContext: Created broadcast 16 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:02,615 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:02,615 INFO scheduler.TaskSchedulerImpl: Adding task set 28.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:02,616 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 28.0 (TID 24) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:02,617 INFO executor.Executor: Running task 0.0 in stage 28.0 (TID 24) | |
2023-05-17 16:28:02,664 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:28:02,665 INFO executor.Executor: Finished task 0.0 in stage 28.0 (TID 24). 845 bytes result sent to driver | |
2023-05-17 16:28:02,666 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 28.0 (TID 24) in 50 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:02,666 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 28.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:02,666 INFO scheduler.DAGScheduler: ResultStage 28 (sum at DeltaSync.java:697) finished in 0.116 s | |
2023-05-17 16:28:02,667 INFO scheduler.DAGScheduler: Job 13 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:02,667 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 28: Stage finished | |
2023-05-17 16:28:02,667 INFO scheduler.DAGScheduler: Job 13 finished: sum at DeltaSync.java:697, took 0.126364 s | |
2023-05-17 16:28:02,751 INFO spark.SparkContext: Starting job: collect at SparkRDDWriteClient.java:101 | |
2023-05-17 16:28:02,753 INFO scheduler.DAGScheduler: Got job 14 (collect at SparkRDDWriteClient.java:101) with 1 output partitions | |
2023-05-17 16:28:02,753 INFO scheduler.DAGScheduler: Final stage: ResultStage 34 (collect at SparkRDDWriteClient.java:101) | |
2023-05-17 16:28:02,753 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 33) | |
2023-05-17 16:28:02,754 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:02,754 INFO scheduler.DAGScheduler: Submitting ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101), which has no missing parents | |
2023-05-17 16:28:02,790 INFO memory.MemoryStore: Block broadcast_17 stored as values in memory (estimated size 573.3 KiB, free 363.7 MiB) | |
2023-05-17 16:28:02,793 INFO memory.MemoryStore: Block broadcast_17_piece0 stored as bytes in memory (estimated size 216.8 KiB, free 363.5 MiB) | |
2023-05-17 16:28:02,793 INFO storage.BlockManagerInfo: Added broadcast_17_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 216.8 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:02,794 INFO spark.SparkContext: Created broadcast 17 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:02,794 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:02,794 INFO scheduler.TaskSchedulerImpl: Adding task set 34.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:02,795 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 34.0 (TID 25) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:02,795 INFO executor.Executor: Running task 0.0 in stage 34.0 (TID 25) | |
2023-05-17 16:28:02,819 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:28:02,820 INFO executor.Executor: Finished task 0.0 in stage 34.0 (TID 25). 1162 bytes result sent to driver | |
2023-05-17 16:28:02,821 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 34.0 (TID 25) in 26 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:02,821 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 34.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:02,822 INFO scheduler.DAGScheduler: ResultStage 34 (collect at SparkRDDWriteClient.java:101) finished in 0.067 s | |
2023-05-17 16:28:02,822 INFO scheduler.DAGScheduler: Job 14 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:02,822 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 34: Stage finished | |
2023-05-17 16:28:02,823 INFO scheduler.DAGScheduler: Job 14 finished: collect at SparkRDDWriteClient.java:101, took 0.071526 s | |
2023-05-17 16:28:02,823 INFO client.BaseHoodieWriteClient: Committing 20230517162755867 action commit | |
2023-05-17 16:28:02,823 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,825 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,828 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,828 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,830 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__INFLIGHT]} | |
2023-05-17 16:28:02,830 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,832 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,837 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,837 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,838 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,840 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,842 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:02,842 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:02,843 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:02,843 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:28:02,843 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:28:02,843 INFO util.CommitUtils: Creating metadata for UPSERT numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:28:02,846 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,847 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,849 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,849 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,850 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__INFLIGHT]} | |
2023-05-17 16:28:02,851 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,852 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,854 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,854 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,855 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,857 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,858 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:02,859 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:02,859 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:02,860 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:28:02,860 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:28:02,860 INFO client.BaseHoodieWriteClient: Committing 20230517162755867 action commit | |
2023-05-17 16:28:02,860 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t162744. Falling back to direct markers. | |
2023-05-17 16:28:02,907 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:28:02,908 INFO scheduler.DAGScheduler: Got job 15 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:28:02,908 INFO scheduler.DAGScheduler: Final stage: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:28:02,908 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:02,908 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:02,909 INFO scheduler.DAGScheduler: Submitting ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:28:02,923 INFO memory.MemoryStore: Block broadcast_18 stored as values in memory (estimated size 128.1 KiB, free 363.4 MiB) | |
2023-05-17 16:28:02,925 INFO memory.MemoryStore: Block broadcast_18_piece0 stored as bytes in memory (estimated size 48.2 KiB, free 363.3 MiB) | |
2023-05-17 16:28:02,926 INFO storage.BlockManagerInfo: Added broadcast_18_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 48.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:02,926 INFO spark.SparkContext: Created broadcast 18 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:02,927 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:02,927 INFO scheduler.TaskSchedulerImpl: Adding task set 35.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:02,928 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 35.0 (TID 26) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:02,928 INFO executor.Executor: Running task 0.0 in stage 35.0 (TID 26) | |
2023-05-17 16:28:02,943 INFO executor.Executor: Finished task 0.0 in stage 35.0 (TID 26). 884 bytes result sent to driver | |
2023-05-17 16:28:02,943 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 35.0 (TID 26) in 16 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:02,943 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 35.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:02,944 INFO scheduler.DAGScheduler: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) finished in 0.035 s | |
2023-05-17 16:28:02,944 INFO scheduler.DAGScheduler: Job 15 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:02,944 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 35: Stage finished | |
2023-05-17 16:28:02,944 INFO scheduler.DAGScheduler: Job 15 finished: collect at HoodieSparkEngineContext.java:137, took 0.036682 s | |
2023-05-17 16:28:02,948 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,950 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,953 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,954 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,955 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,958 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,959 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:02,960 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:02,960 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:02,960 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:28:02,961 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:02,962 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:02,962 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:28:02,967 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=3, StoreTimeTaken=1 | |
2023-05-17 16:28:02,969 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,971 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,973 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,974 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:02,975 INFO metadata.HoodieBackedTableMetadataWriter: Async metadata indexing enabled and following partitions already initialized: [files] | |
2023-05-17 16:28:02,975 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,976 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,980 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:02,980 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,981 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:02,983 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:02,985 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:02,985 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:02,986 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,019 INFO metadata.HoodieTableMetadataUtil: Updating at 20230517162755867 from Commit/UPSERT. #partitions_updated=2 | |
2023-05-17 16:28:03,021 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:03,021 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,021 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:28:03,022 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:03,022 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,022 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:28:03,024 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
2023-05-17 16:28:03,037 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:28:03,038 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:28:03,038 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,040 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,042 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,042 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,043 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,044 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,044 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,045 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,046 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,047 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,047 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,049 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,049 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,049 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,051 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,053 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__INFLIGHT]} | |
2023-05-17 16:28:03,055 INFO client.BaseHoodieWriteClient: Scheduling table service COMPACT | |
2023-05-17 16:28:03,055 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,056 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,058 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,058 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,059 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,059 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,060 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,061 INFO client.BaseHoodieWriteClient: Scheduling compaction at instant time :00000000000000001 | |
2023-05-17 16:28:03,069 INFO compact.ScheduleCompactionActionExecutor: Checking if compaction needs to be run on /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,074 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,076 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,078 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,078 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,079 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,079 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:28:03,079 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,080 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,082 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,082 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,083 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:28:03,084 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,084 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,084 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517162755867 action: deltacommit | |
2023-05-17 16:28:03,084 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517162755867__deltacommit__REQUESTED] | |
2023-05-17 16:28:03,094 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,095 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:03,097 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,097 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,098 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__deltacommit__REQUESTED]} | |
2023-05-17 16:28:03,099 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,099 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,138 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:28:03,138 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:28:03,140 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:03,141 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,161 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:28:03,163 INFO scheduler.DAGScheduler: Registering RDD 66 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 10 | |
2023-05-17 16:28:03,163 INFO scheduler.DAGScheduler: Got job 16 (countByKey at HoodieJavaPairRDD.java:105) with 1 output partitions | |
2023-05-17 16:28:03,163 INFO scheduler.DAGScheduler: Final stage: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:28:03,163 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 36) | |
2023-05-17 16:28:03,163 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 36) | |
2023-05-17 16:28:03,164 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:28:03,167 INFO memory.MemoryStore: Block broadcast_19 stored as values in memory (estimated size 9.5 KiB, free 363.3 MiB) | |
2023-05-17 16:28:03,169 INFO memory.MemoryStore: Block broadcast_19_piece0 stored as bytes in memory (estimated size 5.2 KiB, free 363.3 MiB) | |
2023-05-17 16:28:03,169 INFO storage.BlockManagerInfo: Added broadcast_19_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 5.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:03,170 INFO spark.SparkContext: Created broadcast 19 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:03,171 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:03,171 INFO scheduler.TaskSchedulerImpl: Adding task set 36.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:03,174 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 36.0 (TID 27) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:03,175 INFO executor.Executor: Running task 0.0 in stage 36.0 (TID 27) | |
2023-05-17 16:28:03,183 INFO memory.MemoryStore: Block rdd_64_0 stored as values in memory (estimated size 380.0 B, free 363.3 MiB) | |
2023-05-17 16:28:03,184 INFO storage.BlockManagerInfo: Added rdd_64_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 380.0 B, free: 365.6 MiB) | |
2023-05-17 16:28:03,192 INFO executor.Executor: Finished task 0.0 in stage 36.0 (TID 27). 1115 bytes result sent to driver | |
2023-05-17 16:28:03,193 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 36.0 (TID 27) in 21 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:03,193 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 36.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:03,194 INFO scheduler.DAGScheduler: ShuffleMapStage 36 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.029 s | |
2023-05-17 16:28:03,194 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:03,194 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:03,194 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 37) | |
2023-05-17 16:28:03,195 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:03,195 INFO scheduler.DAGScheduler: Submitting ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:28:03,196 INFO memory.MemoryStore: Block broadcast_20 stored as values in memory (estimated size 4.6 KiB, free 363.3 MiB) | |
2023-05-17 16:28:03,198 INFO memory.MemoryStore: Block broadcast_20_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 363.3 MiB) | |
2023-05-17 16:28:03,198 INFO storage.BlockManagerInfo: Added broadcast_20_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 2.6 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:03,199 INFO spark.SparkContext: Created broadcast 20 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:03,201 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:03,201 INFO scheduler.TaskSchedulerImpl: Adding task set 37.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:03,202 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 37.0 (TID 28) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:03,204 INFO executor.Executor: Running task 0.0 in stage 37.0 (TID 28) | |
2023-05-17 16:28:03,209 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (117.0 B) non-empty blocks including 1 (117.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:03,209 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:03,218 INFO executor.Executor: Finished task 0.0 in stage 37.0 (TID 28). 1303 bytes result sent to driver | |
2023-05-17 16:28:03,219 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 37.0 (TID 28) in 17 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:03,219 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 37.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:03,220 INFO scheduler.DAGScheduler: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.025 s | |
2023-05-17 16:28:03,220 INFO scheduler.DAGScheduler: Job 16 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:03,220 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 37: Stage finished | |
2023-05-17 16:28:03,220 INFO scheduler.DAGScheduler: Job 16 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.058939 s | |
2023-05-17 16:28:03,221 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=2}, InputPartitionStat={files=WorkloadStat {numInserts=0, numUpdates=2}}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:28:03,221 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:28:03,340 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:28:03,341 INFO scheduler.DAGScheduler: Got job 17 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:28:03,341 INFO scheduler.DAGScheduler: Final stage: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:28:03,341 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:03,341 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:03,342 INFO scheduler.DAGScheduler: Submitting ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:28:03,371 INFO memory.MemoryStore: Block broadcast_21 stored as values in memory (estimated size 431.5 KiB, free 362.9 MiB) | |
2023-05-17 16:28:03,374 INFO memory.MemoryStore: Block broadcast_21_piece0 stored as bytes in memory (estimated size 161.5 KiB, free 362.7 MiB) | |
2023-05-17 16:28:03,374 INFO storage.BlockManagerInfo: Added broadcast_21_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 161.5 KiB, free: 365.4 MiB) | |
2023-05-17 16:28:03,375 INFO spark.SparkContext: Created broadcast 21 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:03,375 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:03,375 INFO scheduler.TaskSchedulerImpl: Adding task set 38.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:03,376 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 38.0 (TID 29) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4339 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:03,376 INFO executor.Executor: Running task 0.0 in stage 38.0 (TID 29) | |
2023-05-17 16:28:03,402 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:03,403 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:03,403 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:03,404 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:03,405 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,406 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:28:03,408 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
2023-05-17 16:28:03,414 INFO executor.Executor: Finished task 0.0 in stage 38.0 (TID 29). 829 bytes result sent to driver | |
2023-05-17 16:28:03,415 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 38.0 (TID 29) in 39 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:03,415 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 38.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:03,415 INFO scheduler.DAGScheduler: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) finished in 0.073 s | |
2023-05-17 16:28:03,415 INFO scheduler.DAGScheduler: Job 17 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:03,415 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 38: Stage finished | |
2023-05-17 16:28:03,415 INFO scheduler.DAGScheduler: Job 17 finished: collectAsMap at UpsertPartitioner.java:279, took 0.074955 s | |
2023-05-17 16:28:03,416 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:03,417 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:03,417 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=UPDATE, fileIdPrefix=files-0000, partitionPath=files}}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{files-0000=0} | |
2023-05-17 16:28:03,417 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/20230517162755867.deltacommit.requested | |
2023-05-17 16:28:03,429 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/20230517162755867.deltacommit.inflight | |
2023-05-17 16:28:03,842 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/20230517162755867.deltacommit.inflight | |
2023-05-17 16:28:03,884 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:28:03,884 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 20230517162755867 | |
2023-05-17 16:28:03,929 INFO storage.BlockManagerInfo: Removed broadcast_19_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 5.2 KiB, free: 365.4 MiB) | |
2023-05-17 16:28:03,938 INFO storage.BlockManagerInfo: Removed broadcast_18_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 48.2 KiB, free: 365.4 MiB) | |
2023-05-17 16:28:03,947 INFO storage.BlockManagerInfo: Removed broadcast_16_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 216.7 KiB, free: 365.7 MiB) | |
2023-05-17 16:28:03,957 INFO storage.BlockManagerInfo: Removed broadcast_21_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 161.5 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:03,963 INFO storage.BlockManagerInfo: Removed broadcast_20_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 2.6 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:03,989 INFO storage.BlockManagerInfo: Removed broadcast_17_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 216.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:04,006 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:28:04,007 INFO scheduler.DAGScheduler: Registering RDD 70 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 11 | |
2023-05-17 16:28:04,008 INFO scheduler.DAGScheduler: Got job 18 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:28:04,008 INFO scheduler.DAGScheduler: Final stage: ResultStage 40 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:28:04,008 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 39) | |
2023-05-17 16:28:04,008 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 39) | |
2023-05-17 16:28:04,009 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:28:04,041 INFO memory.MemoryStore: Block broadcast_22 stored as values in memory (estimated size 436.4 KiB, free 364.6 MiB) | |
2023-05-17 16:28:04,044 INFO memory.MemoryStore: Block broadcast_22_piece0 stored as bytes in memory (estimated size 163.1 KiB, free 364.5 MiB) | |
2023-05-17 16:28:04,044 INFO storage.BlockManagerInfo: Added broadcast_22_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 163.1 KiB, free: 365.9 MiB) | |
2023-05-17 16:28:04,045 INFO spark.SparkContext: Created broadcast 22 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:04,045 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:04,045 INFO scheduler.TaskSchedulerImpl: Adding task set 39.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:04,047 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 39.0 (TID 30) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:04,047 INFO executor.Executor: Running task 0.0 in stage 39.0 (TID 30) | |
2023-05-17 16:28:04,071 INFO storage.BlockManager: Found block rdd_64_0 locally | |
2023-05-17 16:28:04,076 INFO executor.Executor: Finished task 0.0 in stage 39.0 (TID 30). 1072 bytes result sent to driver | |
2023-05-17 16:28:04,077 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 39.0 (TID 30) in 31 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:04,077 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 39.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:04,078 INFO scheduler.DAGScheduler: ShuffleMapStage 39 (mapToPair at HoodieJavaRDD.java:135) finished in 0.068 s | |
2023-05-17 16:28:04,078 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:28:04,078 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:28:04,080 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 40) | |
2023-05-17 16:28:04,080 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:28:04,081 INFO scheduler.DAGScheduler: Submitting ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:28:04,137 INFO memory.MemoryStore: Block broadcast_23 stored as values in memory (estimated size 566.5 KiB, free 363.9 MiB) | |
2023-05-17 16:28:04,141 INFO memory.MemoryStore: Block broadcast_23_piece0 stored as bytes in memory (estimated size 214.1 KiB, free 363.7 MiB) | |
2023-05-17 16:28:04,141 INFO storage.BlockManagerInfo: Added broadcast_23_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 214.1 KiB, free: 365.7 MiB) | |
2023-05-17 16:28:04,142 INFO spark.SparkContext: Created broadcast 23 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:04,142 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:04,142 INFO scheduler.TaskSchedulerImpl: Adding task set 40.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:04,143 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 40.0 (TID 31) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:04,144 INFO executor.Executor: Running task 0.0 in stage 40.0 (TID 31) | |
2023-05-17 16:28:04,174 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (334.0 B) non-empty blocks including 1 (334.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:28:04,174 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:28:04,190 INFO deltacommit.BaseSparkDeltaCommitActionExecutor: Merging updates for commit 20230517162755867 for file files-0000 | |
2023-05-17 16:28:04,202 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:04,202 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:04,202 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:04,203 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:04,203 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:04,203 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:28:04,205 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed.] | |
2023-05-17 16:28:05,280 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/.temp/20230517162755867/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND | |
2023-05-17 16:28:05,285 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/.temp/20230517162755867/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND in 7 ms | |
2023-05-17 16:28:05,286 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:28:05,287 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:28:05,366 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=124} exists. Appending to existing file | |
2023-05-17 16:28:05,591 INFO impl.MetricsConfig: Loaded properties from hadoop-metrics2.properties | |
2023-05-17 16:28:05,604 INFO impl.MetricsSystemImpl: Scheduled Metric snapshot period at 300 second(s). | |
2023-05-17 16:28:05,604 INFO impl.MetricsSystemImpl: HBase metrics system started | |
2023-05-17 16:28:05,626 INFO metrics.MetricRegistries: Loaded MetricRegistries class org.apache.hudi.org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl | |
2023-05-17 16:28:05,664 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:28:05,667 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:28:05,801 INFO storage.BlockManagerInfo: Removed broadcast_22_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 163.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:28:05,841 INFO io.HoodieAppendHandle: AppendHandle for partitionPath files filePath files/.files-0000_00000000000000.log.1_0-0-0, took 1641 ms. | |
2023-05-17 16:28:06,253 INFO memory.MemoryStore: Block rdd_74_0 stored as values in memory (estimated size 381.0 B, free 364.3 MiB) | |
2023-05-17 16:28:06,254 INFO storage.BlockManagerInfo: Added rdd_74_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 381.0 B, free: 365.8 MiB) | |
2023-05-17 16:28:06,260 INFO executor.Executor: Finished task 0.0 in stage 40.0 (TID 31). 1558 bytes result sent to driver | |
2023-05-17 16:28:06,261 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 40.0 (TID 31) in 2118 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:06,261 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 40.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:06,261 INFO scheduler.DAGScheduler: ResultStage 40 (collect at HoodieJavaRDD.java:163) finished in 2.179 s | |
2023-05-17 16:28:06,262 INFO scheduler.DAGScheduler: Job 18 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:06,262 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 40: Stage finished | |
2023-05-17 16:28:06,262 INFO scheduler.DAGScheduler: Job 18 finished: collect at HoodieJavaRDD.java:163, took 2.255412 s | |
2023-05-17 16:28:06,263 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:28:06,349 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:28:06,350 INFO scheduler.DAGScheduler: Got job 19 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:28:06,350 INFO scheduler.DAGScheduler: Final stage: ResultStage 42 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:28:06,350 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 41) | |
2023-05-17 16:28:06,350 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:06,350 INFO scheduler.DAGScheduler: Submitting ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:28:06,386 INFO memory.MemoryStore: Block broadcast_24 stored as values in memory (estimated size 566.5 KiB, free 363.7 MiB) | |
2023-05-17 16:28:06,389 INFO memory.MemoryStore: Block broadcast_24_piece0 stored as bytes in memory (estimated size 214.1 KiB, free 363.5 MiB) | |
2023-05-17 16:28:06,389 INFO storage.BlockManagerInfo: Added broadcast_24_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 214.1 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:06,390 INFO spark.SparkContext: Created broadcast 24 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:06,390 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:06,390 INFO scheduler.TaskSchedulerImpl: Adding task set 42.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:06,391 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 42.0 (TID 32) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:06,391 INFO executor.Executor: Running task 0.0 in stage 42.0 (TID 32) | |
2023-05-17 16:28:06,416 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:28:06,417 INFO executor.Executor: Finished task 0.0 in stage 42.0 (TID 32). 1128 bytes result sent to driver | |
2023-05-17 16:28:06,418 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 42.0 (TID 32) in 27 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:06,418 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 42.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:06,418 INFO scheduler.DAGScheduler: ResultStage 42 (collect at HoodieJavaRDD.java:163) finished in 0.067 s | |
2023-05-17 16:28:06,418 INFO scheduler.DAGScheduler: Job 19 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:06,418 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 42: Stage finished | |
2023-05-17 16:28:06,419 INFO scheduler.DAGScheduler: Job 19 finished: collect at HoodieJavaRDD.java:163, took 0.069841 s | |
2023-05-17 16:28:06,419 INFO commit.BaseSparkCommitActionExecutor: Committing 20230517162755867, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:28:06,470 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:28:06,470 INFO scheduler.DAGScheduler: Got job 20 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:28:06,471 INFO scheduler.DAGScheduler: Final stage: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:28:06,471 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:06,471 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:06,471 INFO scheduler.DAGScheduler: Submitting ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:28:06,481 INFO memory.MemoryStore: Block broadcast_25 stored as values in memory (estimated size 128.1 KiB, free 363.4 MiB) | |
2023-05-17 16:28:06,483 INFO memory.MemoryStore: Block broadcast_25_piece0 stored as bytes in memory (estimated size 48.2 KiB, free 363.4 MiB) | |
2023-05-17 16:28:06,483 INFO storage.BlockManagerInfo: Added broadcast_25_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 48.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:28:06,483 INFO spark.SparkContext: Created broadcast 25 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:06,484 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:06,484 INFO scheduler.TaskSchedulerImpl: Adding task set 43.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:06,485 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 43.0 (TID 33) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:06,485 INFO executor.Executor: Running task 0.0 in stage 43.0 (TID 33) | |
2023-05-17 16:28:06,493 INFO executor.Executor: Finished task 0.0 in stage 43.0 (TID 33). 797 bytes result sent to driver | |
2023-05-17 16:28:06,493 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 43.0 (TID 33) in 8 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:06,494 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 43.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:06,494 INFO scheduler.DAGScheduler: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) finished in 0.023 s | |
2023-05-17 16:28:06,494 INFO scheduler.DAGScheduler: Job 20 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:06,494 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 43: Stage finished | |
2023-05-17 16:28:06,495 INFO scheduler.DAGScheduler: Job 20 finished: collect at HoodieSparkEngineContext.java:137, took 0.024611 s | |
2023-05-17 16:28:06,504 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517162755867__deltacommit__INFLIGHT] | |
2023-05-17 16:28:06,504 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/20230517162755867.deltacommit.inflight | |
2023-05-17 16:28:06,925 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/20230517162755867.deltacommit | |
2023-05-17 16:28:06,925 INFO timeline.HoodieActiveTimeline: Completed [==>20230517162755867__deltacommit__INFLIGHT] | |
2023-05-17 16:28:06,925 INFO commit.BaseSparkCommitActionExecutor: Committed 20230517162755867 | |
2023-05-17 16:28:06,963 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:28:06,964 INFO scheduler.DAGScheduler: Got job 21 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:28:06,964 INFO scheduler.DAGScheduler: Final stage: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:28:06,964 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:06,964 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:06,964 INFO scheduler.DAGScheduler: Submitting ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:28:06,974 INFO memory.MemoryStore: Block broadcast_26 stored as values in memory (estimated size 128.2 KiB, free 363.2 MiB) | |
2023-05-17 16:28:06,976 INFO memory.MemoryStore: Block broadcast_26_piece0 stored as bytes in memory (estimated size 48.3 KiB, free 363.2 MiB) | |
2023-05-17 16:28:06,976 INFO storage.BlockManagerInfo: Added broadcast_26_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 48.3 KiB, free: 365.5 MiB) | |
2023-05-17 16:28:06,976 INFO spark.SparkContext: Created broadcast 26 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:06,977 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:06,977 INFO scheduler.TaskSchedulerImpl: Adding task set 44.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:06,978 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 44.0 (TID 34) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:06,978 INFO executor.Executor: Running task 0.0 in stage 44.0 (TID 34) | |
2023-05-17 16:28:06,986 INFO executor.Executor: Finished task 0.0 in stage 44.0 (TID 34). 958 bytes result sent to driver | |
2023-05-17 16:28:06,987 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 44.0 (TID 34) in 10 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:06,987 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 44.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:06,987 INFO scheduler.DAGScheduler: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.022 s | |
2023-05-17 16:28:06,987 INFO scheduler.DAGScheduler: Job 21 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:06,988 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 44: Stage finished | |
2023-05-17 16:28:06,988 INFO scheduler.DAGScheduler: Job 21 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.024484 s | |
2023-05-17 16:28:06,990 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/.temp/20230517162755867 | |
2023-05-17 16:28:06,991 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:06,992 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:06,995 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:06,995 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:06,996 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:06,996 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:06,996 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:07,070 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:28:07,071 INFO scheduler.DAGScheduler: Got job 22 (collect at SparkHoodieBackedTableMetadataWriter.java:185) with 1 output partitions | |
2023-05-17 16:28:07,071 INFO scheduler.DAGScheduler: Final stage: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) | |
2023-05-17 16:28:07,071 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 45) | |
2023-05-17 16:28:07,072 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:07,072 INFO scheduler.DAGScheduler: Submitting ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255), which has no missing parents | |
2023-05-17 16:28:07,108 INFO memory.MemoryStore: Block broadcast_27 stored as values in memory (estimated size 566.1 KiB, free 362.6 MiB) | |
2023-05-17 16:28:07,111 INFO memory.MemoryStore: Block broadcast_27_piece0 stored as bytes in memory (estimated size 213.9 KiB, free 362.4 MiB) | |
2023-05-17 16:28:07,112 INFO storage.BlockManagerInfo: Added broadcast_27_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 213.9 KiB, free: 365.3 MiB) | |
2023-05-17 16:28:07,112 INFO spark.SparkContext: Created broadcast 27 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:07,112 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:07,113 INFO scheduler.TaskSchedulerImpl: Adding task set 46.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:07,114 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 46.0 (TID 35) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:07,114 INFO executor.Executor: Running task 0.0 in stage 46.0 (TID 35) | |
2023-05-17 16:28:07,130 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:28:07,131 INFO executor.Executor: Finished task 0.0 in stage 46.0 (TID 35). 1224 bytes result sent to driver | |
2023-05-17 16:28:07,132 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 46.0 (TID 35) in 19 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:07,132 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 46.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:07,132 INFO scheduler.DAGScheduler: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) finished in 0.059 s | |
2023-05-17 16:28:07,133 INFO scheduler.DAGScheduler: Job 22 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:07,133 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 46: Stage finished | |
2023-05-17 16:28:07,133 INFO scheduler.DAGScheduler: Job 22 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.062373 s | |
2023-05-17 16:28:07,135 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,136 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,137 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,139 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,141 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,141 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,142 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,143 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:07,143 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:07,143 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:28:07,143 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,144 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,146 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,146 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,147 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,147 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:07,147 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:07,147 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517162755867002 | |
2023-05-17 16:28:07,154 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,154 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:07,154 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:07,157 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:28:07,157 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:28:07,168 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,170 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,171 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,173 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,173 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,174 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,174 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:07,175 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:07,176 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,177 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,179 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,179 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,180 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,181 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:28:07,181 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:28:07,193 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,194 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,195 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,197 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,197 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517162755867__commit__INFLIGHT]} | |
2023-05-17 16:28:07,200 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:28:07,200 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517162755867__commit__INFLIGHT] | |
2023-05-17 16:28:07,200 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/20230517162755867.inflight | |
2023-05-17 16:28:07,615 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t162744/.hoodie/20230517162755867.commit | |
2023-05-17 16:28:07,615 INFO timeline.HoodieActiveTimeline: Completed [==>20230517162755867__commit__INFLIGHT] | |
2023-05-17 16:28:07,615 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t162744. Falling back to direct markers. | |
2023-05-17 16:28:07,649 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:28:07,650 INFO scheduler.DAGScheduler: Got job 23 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:28:07,650 INFO scheduler.DAGScheduler: Final stage: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:28:07,650 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:28:07,650 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:28:07,650 INFO scheduler.DAGScheduler: Submitting ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:28:07,660 INFO memory.MemoryStore: Block broadcast_28 stored as values in memory (estimated size 128.2 KiB, free 362.3 MiB) | |
2023-05-17 16:28:07,662 INFO memory.MemoryStore: Block broadcast_28_piece0 stored as bytes in memory (estimated size 48.3 KiB, free 362.3 MiB) | |
2023-05-17 16:28:07,662 INFO storage.BlockManagerInfo: Added broadcast_28_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:40491 (size: 48.3 KiB, free: 365.3 MiB) | |
2023-05-17 16:28:07,663 INFO spark.SparkContext: Created broadcast 28 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:28:07,663 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:28:07,663 INFO scheduler.TaskSchedulerImpl: Adding task set 47.0 with 1 tasks resource profile 0 | |
2023-05-17 16:28:07,664 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 47.0 (TID 36) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:28:07,664 INFO executor.Executor: Running task 0.0 in stage 47.0 (TID 36) | |
2023-05-17 16:28:07,674 INFO executor.Executor: Finished task 0.0 in stage 47.0 (TID 36). 940 bytes result sent to driver | |
2023-05-17 16:28:07,675 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 47.0 (TID 36) in 11 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:28:07,675 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 47.0, whose tasks have all completed, from pool | |
2023-05-17 16:28:07,676 INFO scheduler.DAGScheduler: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.024 s | |
2023-05-17 16:28:07,676 INFO scheduler.DAGScheduler: Job 23 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:28:07,676 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 47: Stage finished | |
2023-05-17 16:28:07,676 INFO scheduler.DAGScheduler: Job 23 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.026661 s | |
2023-05-17 16:28:07,678 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t162744/.hoodie/.temp/20230517162755867 | |
2023-05-17 16:28:07,678 INFO client.BaseHoodieWriteClient: Committed 20230517162755867 | |
2023-05-17 16:28:07,684 INFO rdd.MapPartitionsRDD: Removing RDD 74 from persistence list | |
2023-05-17 16:28:07,685 INFO rdd.MapPartitionsRDD: Removing RDD 53 from persistence list | |
2023-05-17 16:28:07,686 INFO rdd.UnionRDD: Removing RDD 64 from persistence list | |
2023-05-17 16:28:07,686 INFO rdd.MapPartitionsRDD: Removing RDD 43 from persistence list | |
2023-05-17 16:28:07,687 INFO client.BaseHoodieWriteClient: Start to clean synchronously. | |
2023-05-17 16:28:07,687 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,689 INFO storage.BlockManager: Removing RDD 74 | |
2023-05-17 16:28:07,689 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,691 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,691 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,693 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:07,693 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,694 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,697 INFO storage.BlockManager: Removing RDD 53 | |
2023-05-17 16:28:07,698 INFO storage.BlockManager: Removing RDD 64 | |
2023-05-17 16:28:07,699 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,699 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,701 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,706 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,708 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,710 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:07,711 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:07,711 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:28:07,711 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:28:07,711 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:28:07,711 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,712 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,713 INFO storage.BlockManager: Removing RDD 43 | |
2023-05-17 16:28:07,715 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,715 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,716 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:07,716 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,717 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,720 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,720 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,721 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:07,723 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:07,725 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:07,725 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:07,725 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:07,725 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:28:07,725 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:28:07,725 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517162807687 | |
2023-05-17 16:28:07,726 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t162744. Server=ip-172-31-19-77.us-east-2.compute.internal:38649, Timeout=300 | |
2023-05-17 16:28:07,726 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:07,727 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:07,728 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:07,743 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:38649/v1/hoodie/view/compactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t162744&lastinstantts=20230517162755867&timelinehash=4c2b0c6d12d0ca09af7ee9cc7a5490316d1b2d26791d741ac8c9419d50a7655e) | |
2023-05-17 16:28:08,081 INFO storage.BlockManagerInfo: Removed broadcast_27_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 213.9 KiB, free: 365.5 MiB) | |
2023-05-17 16:28:08,086 INFO storage.BlockManagerInfo: Removed broadcast_23_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 214.1 KiB, free: 365.7 MiB) | |
2023-05-17 16:28:08,091 INFO storage.BlockManagerInfo: Removed broadcast_26_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 48.3 KiB, free: 365.7 MiB) | |
2023-05-17 16:28:08,094 INFO storage.BlockManagerInfo: Removed broadcast_24_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 214.1 KiB, free: 365.9 MiB) | |
2023-05-17 16:28:08,095 INFO storage.BlockManager: Removing RDD 64 | |
2023-05-17 16:28:08,097 INFO storage.BlockManagerInfo: Removed broadcast_28_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 48.3 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:08,100 INFO storage.BlockManagerInfo: Removed broadcast_25_piece0 on ip-172-31-19-77.us-east-2.compute.internal:40491 in memory (size: 48.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:28:08,101 INFO storage.BlockManager: Removing RDD 74 | |
2023-05-17 16:28:08,244 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,247 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,249 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,249 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,251 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,252 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,253 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,255 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,255 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,257 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,259 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,260 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:08,260 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,261 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,261 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,262 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,332 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:38649/v1/hoodie/view/logcompactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t162744&lastinstantts=20230517162755867&timelinehash=4c2b0c6d12d0ca09af7ee9cc7a5490316d1b2d26791d741ac8c9419d50a7655e) | |
2023-05-17 16:28:08,341 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:28:08,341 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:28:08,344 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,346 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,348 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,350 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,351 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,353 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,353 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,354 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,357 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,357 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,358 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,360 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,362 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:08,362 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,362 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,362 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:28:08,362 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:28:08,362 INFO client.BaseHoodieWriteClient: Start to archive synchronously. | |
2023-05-17 16:28:08,365 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,365 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,366 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,368 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,368 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,369 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,372 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:08,373 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:08,374 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,374 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,374 INFO client.HoodieTimelineArchiver: Not archiving as there is no compaction yet on the metadata table | |
2023-05-17 16:28:08,375 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:28:08,375 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t162744. Server=ip-172-31-19-77.us-east-2.compute.internal:38649, Timeout=300 | |
2023-05-17 16:28:08,375 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,375 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,377 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,378 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,379 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:38649/v1/hoodie/view/refresh/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t162744&lastinstantts=20230517162755867&timelinehash=4c2b0c6d12d0ca09af7ee9cc7a5490316d1b2d26791d741ac8c9419d50a7655e) | |
2023-05-17 16:28:08,385 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,386 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,387 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,388 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:08,391 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:08,391 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:08,391 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:08,391 INFO deltastreamer.DeltaSync: Commit 20230517162755867 successful! | |
2023-05-17 16:28:08,494 INFO conf.HiveConf: Found configuration file file:/home/hadoop/spark-3.1.3-bin-hadoop3.2/conf/hive-site.xml | |
2023-05-17 16:28:08,688 WARN conf.HiveConf: HiveConf of name hive.server2.thrift.url does not exist | |
2023-05-17 16:28:08,697 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,699 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:08,706 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,706 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:08,709 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:09,093 INFO hive.metastore: Trying to connect to metastore with URI thrift://ip-172-31-19-77.us-east-2.compute.internal:9083 | |
2023-05-17 16:28:09,109 INFO hive.metastore: Opened a connection to metastore, current connections: 1 | |
2023-05-17 16:28:09,126 INFO hive.metastore: Connected to metastore. | |
2023-05-17 16:28:09,198 INFO hive.HiveSyncTool: Syncing target hoodie table with hive table(default.stocks20230517t162744). Hive metastore URL from HiveConf:thrift://ip-172-31-19-77.us-east-2.compute.internal:9083). Hive metastore URL from HiveSyncConfig:null, basePath :/tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:09,198 INFO hive.HiveSyncTool: Trying to sync hoodie table stocks20230517t162744 with base path /tmp/deltastreamertest/stocks20230517t162744 of type COPY_ON_WRITE | |
2023-05-17 16:28:09,219 INFO table.TableSchemaResolver: Reading schema from /tmp/deltastreamertest/stocks20230517t162744/2018/08/31/2028dec8-f9de-44ba-937e-d7743d78ebaf-0_0-22-23_20230517162755867.parquet | |
2023-05-17 16:28:09,257 INFO hive.HiveSyncTool: Hive table stocks20230517t162744 is not found. Creating it with schema message stock_ticks { | |
optional binary _hoodie_commit_time (UTF8); | |
optional binary _hoodie_commit_seqno (UTF8); | |
optional binary _hoodie_record_key (UTF8); | |
optional binary _hoodie_partition_path (UTF8); | |
optional binary _hoodie_file_name (UTF8); | |
required int64 volume; | |
required binary ts (UTF8); | |
required binary symbol (UTF8); | |
required int32 year; | |
required binary month (UTF8); | |
required double high; | |
required double low; | |
required binary key (UTF8); | |
required binary date (UTF8); | |
required double close; | |
required double open; | |
required binary day (UTF8); | |
} | |
2023-05-17 16:28:09,342 INFO hive.HoodieHiveSyncClient: No comment difference of stocks20230517t162744 | |
2023-05-17 16:28:09,342 INFO hive.HiveSyncTool: Schema sync complete. Syncing partitions for stocks20230517t162744 | |
2023-05-17 16:28:09,342 INFO hive.HiveSyncTool: Last commit time synced was found to be null | |
2023-05-17 16:28:09,342 INFO hive.HiveSyncTool: Sync all partitions given the last commit time synced is empty or before the start of the active timeline. Listing all partitions in /tmp/deltastreamertest/stocks20230517t162744, file system: DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_1718171234_1, ugi=hadoop (auth:SIMPLE)]] | |
2023-05-17 16:28:09,355 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:09,357 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/hoodie.properties | |
2023-05-17 16:28:09,359 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t162744 | |
2023-05-17 16:28:09,359 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:09,362 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:09,363 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:09,365 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:09,365 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:09,365 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:09,365 INFO metadata.HoodieTableMetadataUtil: Loading latest merged file slices for metadata table partition files | |
2023-05-17 16:28:09,366 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:28:09,366 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:28:09,366 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:28:09,367 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=2, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
2023-05-17 16:28:09,371 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__commit__COMPLETED]} | |
2023-05-17 16:28:09,389 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:09,390 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:28:09,394 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata | |
2023-05-17 16:28:09,406 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517162755867__deltacommit__COMPLETED]} | |
2023-05-17 16:28:09,417 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:28:09,420 INFO log.AbstractHoodieLogRecordReader: Reading a delete block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:28:09,420 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:28:09,421 INFO log.AbstractHoodieLogRecordReader: Reading a data block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t162744/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 at instant 20230517162755867 | |
2023-05-17 16:28:09,421 INFO log.AbstractHoodieLogRecordReader: Merging the final data blocks | |
2023-05-17 16:28:09,421 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 2 | |
2023-05-17 16:28:09,423 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 1 | |
2023-05-17 16:28:09,527 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:28:09,530 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:28:09,530 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:28:09,587 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: Number of log files scanned => 1 | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: MaxMemoryInBytes allowed for compaction => 1073741824 | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: Number of entries in MemoryBasedMap in ExternalSpillableMap => 2 | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: Total size in bytes of MemoryBasedMap in ExternalSpillableMap => 1248 | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: Number of entries in BitCaskDiskMap in ExternalSpillableMap => 0 | |
2023-05-17 16:28:09,599 INFO log.HoodieMergedLogRecordScanner: Size of file spilled to disk => 0 | |
2023-05-17 16:28:09,599 INFO metadata.HoodieBackedTableMetadata: Opened 1 metadata log files (dataset instant=20230517162755867, metadata instant=20230517162755867) in 229 ms | |
2023-05-17 16:28:09,606 INFO metadata.BaseTableMetadata: Listed partitions from metadata: #partitions=1 | |
2023-05-17 16:28:09,609 INFO hive.HiveSyncTool: New Partitions [2018/08/31] | |
2023-05-17 16:28:09,609 INFO ddl.HMSDDLExecutor: Adding partitions 1 to table stocks20230517t162744 | |
2023-05-17 16:28:09,664 INFO ddl.HMSDDLExecutor: HMSDDLExecutor add a batch partitions done: 1 | |
2023-05-17 16:28:09,693 INFO hive.HiveSyncTool: Sync complete for stocks20230517t162744 | |
2023-05-17 16:28:09,696 INFO hive.metastore: Closed a connection to metastore, current connections: 0 | |
2023-05-17 16:28:09,697 INFO deltastreamer.DeltaSync: Shutting down embedded timeline server | |
2023-05-17 16:28:09,697 INFO embedded.EmbeddedTimelineService: Closing Timeline server | |
2023-05-17 16:28:09,697 INFO service.TimelineService: Closing Timeline Service | |
2023-05-17 16:28:09,697 INFO javalin.Javalin: Stopping Javalin ... | |
2023-05-17 16:28:09,709 INFO javalin.Javalin: Javalin has stopped | |
2023-05-17 16:28:09,709 INFO service.TimelineService: Closed Timeline Service | |
2023-05-17 16:28:09,709 INFO embedded.EmbeddedTimelineService: Closed Timeline server | |
2023-05-17 16:28:09,711 INFO deltastreamer.HoodieDeltaStreamer: Shut down delta streamer | |
2023-05-17 16:28:09,718 INFO server.AbstractConnector: Stopped Spark@303a5119{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:28:09,718 INFO ui.SparkUI: Stopped Spark web UI at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:28:09,730 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! | |
2023-05-17 16:28:09,743 INFO memory.MemoryStore: MemoryStore cleared | |
2023-05-17 16:28:09,745 INFO storage.BlockManager: BlockManager stopped | |
2023-05-17 16:28:09,748 INFO storage.BlockManagerMaster: BlockManagerMaster stopped | |
2023-05-17 16:28:09,753 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! | |
2023-05-17 16:28:09,757 INFO spark.SparkContext: Successfully stopped SparkContext | |
2023-05-17 16:28:09,759 INFO util.ShutdownHookManager: Shutdown hook called | |
2023-05-17 16:28:09,760 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-a01000bf-741d-4cb7-9490-3b0c5ac456b2 | |
2023-05-17 16:28:09,762 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-fbe3bc17-a1ea-4348-934c-c9844d1cd059 | |
======LOGS FAT BUNDLE======== | |
[hadoop@ip-172-31-19-77 spark-3.1.3-bin-hadoop3.2]$ bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
> /home/hadoop/v_3.1/hudi-utilities-bundle_2.12-0.13.0.jar \ | |
> --target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
> --target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
> --source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
> --source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
> --op UPSERT --enable-sync --spark-master yarn \ | |
> --hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
> --hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
> --hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
> --hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
> --hoodie-conf hoodie.metadata.enable=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
> --hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
> --hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
> --hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
SLF4J: Class path contains multiple SLF4J bindings. | |
SLF4J: Found binding in [jar:file:/home/hadoop/spark-3.1.3-bin-hadoop3.2/jars/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
2023-05-17 16:30:06,730 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
2023-05-17 16:30:07,026 WARN deltastreamer.SchedulerConfGenerator: Job Scheduling Configs will not be in effect as spark.scheduler.mode is not set to FAIR at instantiation time. Continuing without scheduling configs | |
2023-05-17 16:30:07,105 INFO spark.SparkContext: Running Spark version 3.1.3 | |
2023-05-17 16:30:07,145 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:30:07,145 INFO resource.ResourceUtils: No custom resources configured for spark.driver. | |
2023-05-17 16:30:07,146 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:30:07,146 INFO spark.SparkContext: Submitted application: delta-streamer-stocks20230517t163003 | |
2023-05-17 16:30:07,167 INFO resource.ResourceProfile: Default ResourceProfile created, executor resources: Map(cores -> name: cores, amount: 1, script: , vendor: , memory -> name: memory, amount: 1024, script: , vendor: , offHeap -> name: offHeap, amount: 0, script: , vendor: ), task resources: Map(cpus -> name: cpus, amount: 1.0) | |
2023-05-17 16:30:07,180 INFO resource.ResourceProfile: Limiting resource is cpu | |
2023-05-17 16:30:07,181 INFO resource.ResourceProfileManager: Added ResourceProfile id: 0 | |
2023-05-17 16:30:07,227 INFO spark.SecurityManager: Changing view acls to: hadoop | |
2023-05-17 16:30:07,227 INFO spark.SecurityManager: Changing modify acls to: hadoop | |
2023-05-17 16:30:07,227 INFO spark.SecurityManager: Changing view acls groups to: | |
2023-05-17 16:30:07,228 INFO spark.SecurityManager: Changing modify acls groups to: | |
2023-05-17 16:30:07,228 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); groups with view permissions: Set(); users with modify permissions: Set(hadoop); groups with modify permissions: Set() | |
2023-05-17 16:30:07,236 INFO Configuration.deprecation: mapred.output.compression.codec is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.codec | |
2023-05-17 16:30:07,236 INFO Configuration.deprecation: mapred.output.compress is deprecated. Instead, use mapreduce.output.fileoutputformat.compress | |
2023-05-17 16:30:07,236 INFO Configuration.deprecation: mapred.output.compression.type is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.type | |
2023-05-17 16:30:07,397 INFO util.Utils: Successfully started service 'sparkDriver' on port 36997. | |
2023-05-17 16:30:07,427 INFO spark.SparkEnv: Registering MapOutputTracker | |
2023-05-17 16:30:07,464 INFO spark.SparkEnv: Registering BlockManagerMaster | |
2023-05-17 16:30:07,491 INFO storage.BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information | |
2023-05-17 16:30:07,492 INFO storage.BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up | |
2023-05-17 16:30:07,495 INFO spark.SparkEnv: Registering BlockManagerMasterHeartbeat | |
2023-05-17 16:30:07,508 INFO storage.DiskBlockManager: Created local directory at /mnt/tmp/blockmgr-2c7b8787-5687-4922-91a8-62af764ef32e | |
2023-05-17 16:30:07,537 INFO memory.MemoryStore: MemoryStore started with capacity 366.3 MiB | |
2023-05-17 16:30:07,554 INFO spark.SparkEnv: Registering OutputCommitCoordinator | |
2023-05-17 16:30:07,644 INFO util.log: Logging initialized @2430ms to org.sparkproject.jetty.util.log.Slf4jLog | |
2023-05-17 16:30:07,721 INFO server.Server: jetty-9.4.40.v20210413; built: 2021-04-13T20:42:42.668Z; git: b881a572662e1943a14ae12e7e1207989f218b74; jvm 1.8.0_372-b07 | |
2023-05-17 16:30:07,740 INFO server.Server: Started @2527ms | |
2023-05-17 16:30:07,770 INFO server.AbstractConnector: Started ServerConnector@203dd56b{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:30:07,770 INFO util.Utils: Successfully started service 'SparkUI' on port 8090. | |
2023-05-17 16:30:07,792 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3eba57a7{/jobs,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,794 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@43b0ade{/jobs/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,795 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1517f633{/jobs/job,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,796 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6f6962ba{/jobs/job/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,796 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@a0a9fa5{/stages,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,797 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@312afbc7{/stages/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,797 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7b60c3e{/stages/stage,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,798 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@710d7aff{/stages/stage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,799 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@65327f5{/stages/pool,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,800 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@301d8120{/stages/pool/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,800 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@72458efc{/storage,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,801 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6a74d228{/storage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,802 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6a714237{/storage/rdd,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,803 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@72ba28ee{/storage/rdd/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,804 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4ebadd3d{/environment,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,804 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4917d36b{/environment/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,805 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@2d0bfb24{/executors,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,805 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7b44b63d{/executors/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,806 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@38499e48{/executors/threadDump,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,807 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@17ae7628{/executors/threadDump/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,814 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6579c3d9{/static,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,814 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3e9803c2{/,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,816 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7c974942{/api,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,817 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7808fb9{/jobs/job/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,817 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6b580b88{/stages/stage/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:07,819 INFO ui.SparkUI: Bound SparkUI to 0.0.0.0, and started at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:30:07,838 INFO spark.SparkContext: Added JAR file:/home/hadoop/v_3.1/hudi-utilities-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:36997/jars/hudi-utilities-bundle_2.12-0.13.0.jar with timestamp 1684341007097 | |
2023-05-17 16:30:08,018 INFO executor.Executor: Starting executor ID driver on host ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:30:08,038 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:36997/jars/hudi-utilities-bundle_2.12-0.13.0.jar with timestamp 1684341007097 | |
2023-05-17 16:30:08,082 INFO client.TransportClientFactory: Successfully created connection to ip-172-31-19-77.us-east-2.compute.internal/172.31.19.77:36997 after 27 ms (0 ms spent in bootstraps) | |
2023-05-17 16:30:08,089 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:36997/jars/hudi-utilities-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-4c1bf246-f34d-46d9-a57f-3ad6adfa8f06/userFiles-7aa4eba4-c6e8-4c31-9f0b-02d126da1095/fetchFileTemp3734544097314841264.tmp | |
2023-05-17 16:30:08,250 INFO executor.Executor: Adding file:/mnt/tmp/spark-4c1bf246-f34d-46d9-a57f-3ad6adfa8f06/userFiles-7aa4eba4-c6e8-4c31-9f0b-02d126da1095/hudi-utilities-bundle_2.12-0.13.0.jar to class loader | |
2023-05-17 16:30:08,258 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 34001. | |
2023-05-17 16:30:08,258 INFO netty.NettyBlockTransferService: Server created on ip-172-31-19-77.us-east-2.compute.internal:34001 | |
2023-05-17 16:30:08,260 INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy | |
2023-05-17 16:30:08,269 INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 34001, None) | |
2023-05-17 16:30:08,272 INFO storage.BlockManagerMasterEndpoint: Registering block manager ip-172-31-19-77.us-east-2.compute.internal:34001 with 366.3 MiB RAM, BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 34001, None) | |
2023-05-17 16:30:08,275 INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 34001, None) | |
2023-05-17 16:30:08,277 INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 34001, None) | |
2023-05-17 16:30:08,442 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@455824ad{/metrics/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:30:09,048 WARN config.DFSPropertiesConfiguration: Cannot find HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf | |
2023-05-17 16:30:09,065 INFO utilities.UtilHelpers: Adding overridden properties to file properties. | |
2023-05-17 16:30:09,124 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. | |
2023-05-17 16:30:09,270 INFO deltastreamer.HoodieDeltaStreamer: Creating delta streamer with configs: | |
hoodie.auto.adjust.lock.configs: true | |
hoodie.cleaner.policy.failed.writes: EAGER | |
hoodie.datasource.hive_sync.auto_create_database: true | |
hoodie.datasource.hive_sync.database: default | |
hoodie.datasource.hive_sync.ignore_exceptions: false | |
hoodie.datasource.hive_sync.jdbcurl: jdbc:hive2://ip-172-31-19-77.us-east-2.compute.internal:10000 | |
hoodie.datasource.hive_sync.mode: hms | |
hoodie.datasource.hive_sync.partition_extractor_class: org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor | |
hoodie.datasource.hive_sync.partition_fields: date | |
hoodie.datasource.hive_sync.skip_ro_suffix: true | |
hoodie.datasource.hive_sync.sync_as_datasource: true | |
hoodie.datasource.hive_sync.sync_comment: true | |
hoodie.datasource.write.hive_style_partitioning: false | |
hoodie.datasource.write.keygenerator.type: SIMPLE | |
hoodie.datasource.write.partitionpath.field: date | |
hoodie.datasource.write.precombine.field: ts | |
hoodie.datasource.write.reconcile.schema: false | |
hoodie.datasource.write.recordkey.field: symbol | |
hoodie.deltastreamer.schemaprovider.source.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.schemaprovider.target.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.source.dfs.root: /tmp/source_parquet | |
hoodie.index.type: BLOOM | |
hoodie.metadata.enable: true | |
hoodie.write.concurrency.mode: single_writer | |
hoodie.write.lock.provider: org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider | |
hoodie.write.lock.zookeeper.base_path: /hudi | |
hoodie.write.lock.zookeeper.port: 2181 | |
hoodie.write.lock.zookeeper.url: ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:30:09,276 INFO fs.FSUtils: Resolving file /tmp/schema.avscto be a remote file. | |
2023-05-17 16:30:09,537 INFO factory.HoodieSparkKeyGeneratorFactory: The value of hoodie.datasource.write.keygenerator.type is empty, use SIMPLE | |
2023-05-17 16:30:09,554 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t163003 as hoodie table /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,661 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,674 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:09,679 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,679 INFO table.HoodieTableMetaClient: Finished initializing Table of type COPY_ON_WRITE from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,790 INFO helpers.DFSPathSelector: Using path selector org.apache.hudi.utilities.sources.helpers.DFSPathSelector | |
2023-05-17 16:30:09,790 INFO deltastreamer.HoodieDeltaStreamer: Delta Streamer running only single round | |
2023-05-17 16:30:09,792 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,794 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:09,797 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:09,815 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:09,820 INFO deltastreamer.DeltaSync: Checkpoint to resume from : Optional.empty | |
2023-05-17 16:30:09,826 INFO helpers.DFSPathSelector: Root path => /tmp/source_parquet source limit => 9223372036854775807 | |
2023-05-17 16:30:10,262 INFO memory.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 417.9 KiB, free 365.9 MiB) | |
2023-05-17 16:30:10,590 INFO memory.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 45.8 KiB, free 365.8 MiB) | |
2023-05-17 16:30:10,593 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 45.8 KiB, free: 366.3 MiB) | |
2023-05-17 16:30:10,597 INFO spark.SparkContext: Created broadcast 0 from textFile at JsonDFSSource.java:54 | |
2023-05-17 16:30:10,711 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library | |
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path | |
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) | |
at java.lang.Runtime.loadLibrary0(Runtime.java:843) | |
at java.lang.System.loadLibrary(System.java:1136) | |
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32) | |
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71) | |
at java.lang.Class.forName0(Native Method) | |
at java.lang.Class.forName(Class.java:348) | |
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:2532) | |
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2497) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180) | |
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:110) | |
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:79) | |
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:137) | |
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) | |
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.RDD.$anonfun$isEmpty$1(RDD.scala:1557) | |
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1557) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty$(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:545) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:460) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:364) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:215) | |
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:213) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:592) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) | |
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) | |
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) | |
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) | |
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) | |
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
2023-05-17 16:30:10,713 ERROR lzo.LzoCodec: Cannot load native-lzo without native-hadoop | |
2023-05-17 16:30:10,756 INFO mapred.FileInputFormat: Total input files to process : 2 | |
2023-05-17 16:30:10,837 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:545 | |
2023-05-17 16:30:10,856 INFO scheduler.DAGScheduler: Got job 0 (isEmpty at DeltaSync.java:545) with 1 output partitions | |
2023-05-17 16:30:10,857 INFO scheduler.DAGScheduler: Final stage: ResultStage 0 (isEmpty at DeltaSync.java:545) | |
2023-05-17 16:30:10,857 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:10,859 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:10,866 INFO scheduler.DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67), which has no missing parents | |
2023-05-17 16:30:10,919 INFO memory.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 5.8 KiB, free 365.8 MiB) | |
2023-05-17 16:30:10,925 INFO memory.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 365.8 MiB) | |
2023-05-17 16:30:10,928 INFO storage.BlockManagerInfo: Added broadcast_1_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 3.2 KiB, free: 366.3 MiB) | |
2023-05-17 16:30:10,929 INFO spark.SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:10,948 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:10,949 INFO scheduler.TaskSchedulerImpl: Adding task set 0.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:11,026 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:11,050 INFO executor.Executor: Running task 0.0 in stage 0.0 (TID 0) | |
2023-05-17 16:30:11,448 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:30:11,546 INFO executor.Executor: Finished task 0.0 in stage 0.0 (TID 0). 1316 bytes result sent to driver | |
2023-05-17 16:30:11,578 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 572 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:11,581 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:11,596 INFO scheduler.DAGScheduler: ResultStage 0 (isEmpty at DeltaSync.java:545) finished in 0.693 s | |
2023-05-17 16:30:11,604 INFO scheduler.DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:11,605 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished | |
2023-05-17 16:30:11,608 INFO scheduler.DAGScheduler: Job 0 finished: isEmpty at DeltaSync.java:545, took 0.770702 s | |
2023-05-17 16:30:11,635 INFO deltastreamer.DeltaSync: Setting up new Hoodie Write Client | |
2023-05-17 16:30:11,658 INFO embedded.EmbeddedTimelineService: Starting Timeline service !! | |
2023-05-17 16:30:11,660 INFO embedded.EmbeddedTimelineService: Overriding hostIp to (ip-172-31-19-77.us-east-2.compute.internal) found in spark-conf. It was null | |
2023-05-17 16:30:11,675 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:11,676 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:11,706 INFO util.log: Logging initialized @6493ms to org.apache.hudi.org.eclipse.jetty.util.log.Slf4jLog | |
2023-05-17 16:30:11,911 INFO javalin.Javalin: | |
__ __ _ __ __ | |
/ /____ _ _ __ ____ _ / /(_)____ / // / | |
__ / // __ `/| | / // __ `// // // __ \ / // /_ | |
/ /_/ // /_/ / | |/ // /_/ // // // / / / /__ __/ | |
\____/ \__,_/ |___/ \__,_//_//_//_/ /_/ /_/ | |
https://javalin.io/documentation | |
2023-05-17 16:30:11,914 INFO javalin.Javalin: Starting Javalin ... | |
2023-05-17 16:30:11,922 INFO javalin.Javalin: You are running Javalin 4.6.7 (released October 24, 2022. Your Javalin version is 205 days old. Consider checking for a newer version.). | |
2023-05-17 16:30:12,038 INFO server.Server: jetty-9.4.48.v20220622; built: 2022-06-21T20:42:25.880Z; git: 6b67c5719d1f4371b33655ff2d047d24e171e49a; jvm 1.8.0_372-b07 | |
2023-05-17 16:30:12,103 INFO storage.BlockManagerInfo: Removed broadcast_1_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 3.2 KiB, free: 366.3 MiB) | |
2023-05-17 16:30:12,218 INFO server.Server: Started @7005ms | |
2023-05-17 16:30:12,219 INFO javalin.Javalin: Listening on http://localhost:39525/ | |
2023-05-17 16:30:12,219 INFO javalin.Javalin: Javalin started in 306ms \o/ | |
2023-05-17 16:30:12,219 INFO service.TimelineService: Starting Timeline server on port :39525 | |
2023-05-17 16:30:12,219 INFO embedded.EmbeddedTimelineService: Started embedded timeline server at ip-172-31-19-77.us-east-2.compute.internal:39525 | |
2023-05-17 16:30:12,243 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:30:12,270 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:30:12,283 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:665 | |
2023-05-17 16:30:12,284 INFO scheduler.DAGScheduler: Got job 1 (isEmpty at DeltaSync.java:665) with 1 output partitions | |
2023-05-17 16:30:12,284 INFO scheduler.DAGScheduler: Final stage: ResultStage 1 (isEmpty at DeltaSync.java:665) | |
2023-05-17 16:30:12,284 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:12,285 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:12,286 INFO scheduler.DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558), which has no missing parents | |
2023-05-17 16:30:12,291 INFO memory.MemoryStore: Block broadcast_2 stored as values in memory (estimated size 12.2 KiB, free 365.8 MiB) | |
2023-05-17 16:30:12,293 INFO memory.MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 6.4 KiB, free 365.8 MiB) | |
2023-05-17 16:30:12,294 INFO storage.BlockManagerInfo: Added broadcast_2_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 6.4 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:12,295 INFO spark.SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:12,296 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:12,296 INFO scheduler.TaskSchedulerImpl: Adding task set 1.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:12,298 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:12,299 INFO executor.Executor: Running task 0.0 in stage 1.0 (TID 1) | |
2023-05-17 16:30:12,306 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:30:12,353 INFO executor.Executor: Finished task 0.0 in stage 1.0 (TID 1). 1072 bytes result sent to driver | |
2023-05-17 16:30:12,374 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 76 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:12,375 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:12,376 INFO scheduler.DAGScheduler: ResultStage 1 (isEmpty at DeltaSync.java:665) finished in 0.087 s | |
2023-05-17 16:30:12,376 INFO scheduler.DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:12,376 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished | |
2023-05-17 16:30:12,377 INFO scheduler.DAGScheduler: Job 1 finished: isEmpty at DeltaSync.java:665, took 0.093341 s | |
2023-05-17 16:30:12,381 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,384 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,387 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,387 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,389 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,396 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:30:12,401 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,404 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,407 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,407 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,408 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,416 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,418 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,421 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,422 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,424 WARN metadata.HoodieBackedTableMetadata: Metadata table was not found at path /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,424 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:12,424 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:12,435 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517163012379 action: commit | |
2023-05-17 16:30:12,436 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517163012379__commit__REQUESTED] | |
2023-05-17 16:30:12,443 INFO deltastreamer.DeltaSync: Starting commit : 20230517163012379 | |
2023-05-17 16:30:12,443 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,446 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,448 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,448 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,451 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__REQUESTED]} | |
2023-05-17 16:30:12,529 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,531 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,534 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,545 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__REQUESTED]} | |
2023-05-17 16:30:12,546 INFO metadata.HoodieBackedTableMetadataWriter: Creating a new metadata table in /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata at instant 00000000000000 | |
2023-05-17 16:30:12,546 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata as hoodie table /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,577 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,579 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,581 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,581 INFO table.HoodieTableMetaClient: Finished initializing Table of type MERGE_ON_READ from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,581 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,583 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,585 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,585 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,587 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,589 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,590 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,596 INFO view.AbstractTableFileSystemView: Took 2 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:12,618 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:12,620 INFO metadata.HoodieBackedTableMetadataWriter: Creating 1 file groups for partition files with base fileId files- at instant time 00000000000000 | |
2023-05-17 16:30:12,624 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:30:12,624 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:30:12,628 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=0} does not exist. Create a new file | |
2023-05-17 16:30:12,659 INFO metadata.HoodieBackedTableMetadataWriter: Initializing metadata table by using file listings in /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:12,659 INFO metadata.HoodieBackedTableMetadataWriter: Triggering empty Commit to metadata to initialize | |
2023-05-17 16:30:12,662 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:12,663 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:12,664 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:30:12,665 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:30:12,666 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,668 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,670 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,670 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,672 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,678 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:12,678 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:12,680 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,683 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,685 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,686 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,687 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,687 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:30:12,687 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,689 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,692 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,692 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,694 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:30:12,694 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:12,694 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:12,695 INFO client.BaseHoodieWriteClient: Generate a new instant time: 00000000000000 action: deltacommit | |
2023-05-17 16:30:12,696 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>00000000000000__deltacommit__REQUESTED] | |
2023-05-17 16:30:12,701 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,703 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:12,705 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,705 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:12,707 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>00000000000000__deltacommit__REQUESTED]} | |
2023-05-17 16:30:12,707 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:12,707 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:12,712 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:30:12,713 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:30:12,727 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:12,728 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:12,783 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:30:12,790 INFO scheduler.DAGScheduler: Job 2 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.006329 s | |
2023-05-17 16:30:12,794 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=0}, InputPartitionStat={}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:30:12,796 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:30:12,796 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:12,797 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:12,798 INFO commit.UpsertPartitioner: Total Buckets :0, buckets info => {}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:30:12,816 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/00000000000000.deltacommit.requested | |
2023-05-17 16:30:12,827 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:30:13,239 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:30:13,383 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:30:13,383 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 00000000000000 | |
2023-05-17 16:30:13,499 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:30:13,501 INFO scheduler.DAGScheduler: Job 3 finished: collect at HoodieJavaRDD.java:163, took 0.001602 s | |
2023-05-17 16:30:13,502 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:0 numReplaceFileIds:0 | |
2023-05-17 16:30:13,633 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:30:13,636 INFO scheduler.DAGScheduler: Job 4 finished: collect at HoodieJavaRDD.java:163, took 0.000503 s | |
2023-05-17 16:30:13,636 INFO commit.BaseSparkCommitActionExecutor: Committing 00000000000000, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:30:13,643 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:30:13,644 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:30:13,665 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/00000000000000.deltacommit | |
2023-05-17 16:30:13,665 INFO timeline.HoodieActiveTimeline: Completed [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:30:13,665 INFO commit.BaseSparkCommitActionExecutor: Committed 00000000000000 | |
2023-05-17 16:30:13,667 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:13,668 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:13,672 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:13,672 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:13,673 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:13,674 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:13,674 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:13,768 INFO storage.BlockManagerInfo: Removed broadcast_2_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 6.4 KiB, free: 366.3 MiB) | |
2023-05-17 16:30:13,822 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:30:13,825 INFO scheduler.DAGScheduler: Job 5 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.003272 s | |
2023-05-17 16:30:13,828 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:14,276 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:14,278 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:14,280 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:14,280 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:14,285 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:14,287 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:14,290 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:14,290 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:14,291 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:14,291 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:14,293 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:14,295 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:14,295 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:14,296 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:14,298 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:14,299 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:14,300 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:14,300 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:14,300 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:14,300 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:14,301 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:30:14,301 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:30:14,340 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:30:14,344 INFO scheduler.DAGScheduler: Registering RDD 15 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 3 | |
2023-05-17 16:30:14,347 INFO scheduler.DAGScheduler: Registering RDD 19 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 2 | |
2023-05-17 16:30:14,348 INFO scheduler.DAGScheduler: Got job 6 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:30:14,348 INFO scheduler.DAGScheduler: Final stage: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:30:14,348 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 3) | |
2023-05-17 16:30:14,349 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 3) | |
2023-05-17 16:30:14,355 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:30:14,385 INFO memory.MemoryStore: Block broadcast_3 stored as values in memory (estimated size 31.3 KiB, free 365.8 MiB) | |
2023-05-17 16:30:14,387 INFO memory.MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 14.8 KiB, free 365.8 MiB) | |
2023-05-17 16:30:14,388 INFO storage.BlockManagerInfo: Added broadcast_3_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 14.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:14,388 INFO spark.SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:14,390 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:14,390 INFO scheduler.TaskSchedulerImpl: Adding task set 2.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:14,392 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:14,393 INFO executor.Executor: Running task 0.0 in stage 2.0 (TID 2) | |
2023-05-17 16:30:14,462 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:30:14,781 INFO executor.Executor: Finished task 0.0 in stage 2.0 (TID 2). 1202 bytes result sent to driver | |
2023-05-17 16:30:14,782 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 2.0 (TID 3) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:14,783 INFO executor.Executor: Running task 1.0 in stage 2.0 (TID 3) | |
2023-05-17 16:30:14,787 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 396 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:14,793 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_2.json:0+363815 | |
2023-05-17 16:30:14,874 INFO executor.Executor: Finished task 1.0 in stage 2.0 (TID 3). 1202 bytes result sent to driver | |
2023-05-17 16:30:14,876 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 2.0 (TID 3) in 94 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:14,877 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:14,878 INFO scheduler.DAGScheduler: ShuffleMapStage 2 (mapToPair at HoodieJavaRDD.java:135) finished in 0.518 s | |
2023-05-17 16:30:14,879 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:14,879 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:14,879 INFO scheduler.DAGScheduler: waiting: Set(ShuffleMapStage 3, ResultStage 4) | |
2023-05-17 16:30:14,880 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:14,884 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:14,898 INFO memory.MemoryStore: Block broadcast_4 stored as values in memory (estimated size 25.4 KiB, free 365.8 MiB) | |
2023-05-17 16:30:14,900 INFO memory.MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 12.2 KiB, free 365.8 MiB) | |
2023-05-17 16:30:14,901 INFO storage.BlockManagerInfo: Added broadcast_4_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 12.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:14,902 INFO spark.SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:14,902 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:14,903 INFO scheduler.TaskSchedulerImpl: Adding task set 3.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:14,906 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 3.0 (TID 4) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:14,907 INFO executor.Executor: Running task 0.0 in stage 3.0 (TID 4) | |
2023-05-17 16:30:14,938 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:14,939 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 7 ms | |
2023-05-17 16:30:14,974 INFO memory.MemoryStore: Block rdd_17_0 stored as values in memory (estimated size 8.2 KiB, free 365.8 MiB) | |
2023-05-17 16:30:14,974 INFO storage.BlockManagerInfo: Added rdd_17_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 8.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:14,983 INFO executor.Executor: Finished task 0.0 in stage 3.0 (TID 4). 1417 bytes result sent to driver | |
2023-05-17 16:30:14,984 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 3.0 (TID 5) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:14,984 INFO executor.Executor: Running task 1.0 in stage 3.0 (TID 5) | |
2023-05-17 16:30:14,984 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 3.0 (TID 4) in 79 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:14,990 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:14,990 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,003 INFO memory.MemoryStore: Block rdd_17_1 stored as values in memory (estimated size 5.8 KiB, free 365.8 MiB) | |
2023-05-17 16:30:15,004 INFO storage.BlockManagerInfo: Added rdd_17_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 5.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:15,008 INFO executor.Executor: Finished task 1.0 in stage 3.0 (TID 5). 1417 bytes result sent to driver | |
2023-05-17 16:30:15,010 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 3.0 (TID 5) in 27 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,010 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,011 INFO scheduler.DAGScheduler: ShuffleMapStage 3 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.122 s | |
2023-05-17 16:30:15,012 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:15,012 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:15,012 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 4) | |
2023-05-17 16:30:15,012 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:15,012 INFO scheduler.DAGScheduler: Submitting ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:15,015 INFO memory.MemoryStore: Block broadcast_5 stored as values in memory (estimated size 4.6 KiB, free 365.7 MiB) | |
2023-05-17 16:30:15,016 INFO memory.MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.7 MiB) | |
2023-05-17 16:30:15,016 INFO storage.BlockManagerInfo: Added broadcast_5_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 2.6 KiB, free: 366.2 MiB) | |
2023-05-17 16:30:15,017 INFO spark.SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,017 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,017 INFO scheduler.TaskSchedulerImpl: Adding task set 4.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,019 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 4.0 (TID 6) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,019 INFO executor.Executor: Running task 1.0 in stage 4.0 (TID 6) | |
2023-05-17 16:30:15,023 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (120.0 B) non-empty blocks including 2 (120.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,023 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,029 INFO executor.Executor: Finished task 1.0 in stage 4.0 (TID 6). 1244 bytes result sent to driver | |
2023-05-17 16:30:15,030 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 4.0 (TID 7) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,030 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 4.0 (TID 6) in 12 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:15,031 INFO executor.Executor: Running task 0.0 in stage 4.0 (TID 7) | |
2023-05-17 16:30:15,033 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,034 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,035 INFO executor.Executor: Finished task 0.0 in stage 4.0 (TID 7). 1227 bytes result sent to driver | |
2023-05-17 16:30:15,036 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 4.0 (TID 7) in 6 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,036 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,037 INFO scheduler.DAGScheduler: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.024 s | |
2023-05-17 16:30:15,037 INFO scheduler.DAGScheduler: Job 6 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:15,038 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 4: Stage finished | |
2023-05-17 16:30:15,038 INFO scheduler.DAGScheduler: Job 6 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.697813 s | |
2023-05-17 16:30:15,196 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:30:15,197 INFO scheduler.DAGScheduler: Got job 7 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:30:15,197 INFO scheduler.DAGScheduler: Final stage: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:30:15,197 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:15,197 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:15,198 INFO scheduler.DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:30:15,237 INFO memory.MemoryStore: Block broadcast_6 stored as values in memory (estimated size 559.0 KiB, free 365.2 MiB) | |
2023-05-17 16:30:15,241 INFO memory.MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 209.7 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,242 INFO storage.BlockManagerInfo: Added broadcast_6_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 209.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,242 INFO spark.SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,243 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:15,243 INFO scheduler.TaskSchedulerImpl: Adding task set 5.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:15,247 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 5.0 (TID 8) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,248 INFO executor.Executor: Running task 0.0 in stage 5.0 (TID 8) | |
2023-05-17 16:30:15,321 INFO executor.Executor: Finished task 0.0 in stage 5.0 (TID 8). 797 bytes result sent to driver | |
2023-05-17 16:30:15,322 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 5.0 (TID 8) in 78 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:15,322 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,323 INFO scheduler.DAGScheduler: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) finished in 0.124 s | |
2023-05-17 16:30:15,324 INFO scheduler.DAGScheduler: Job 7 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:15,324 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 5: Stage finished | |
2023-05-17 16:30:15,324 INFO scheduler.DAGScheduler: Job 7 finished: collect at HoodieSparkEngineContext.java:137, took 0.128053 s | |
2023-05-17 16:30:15,496 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:103 | |
2023-05-17 16:30:15,498 INFO scheduler.DAGScheduler: Got job 8 (collect at HoodieSparkEngineContext.java:103) with 1 output partitions | |
2023-05-17 16:30:15,498 INFO scheduler.DAGScheduler: Final stage: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) | |
2023-05-17 16:30:15,498 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:15,498 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:15,499 INFO scheduler.DAGScheduler: Submitting ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103), which has no missing parents | |
2023-05-17 16:30:15,537 INFO memory.MemoryStore: Block broadcast_7 stored as values in memory (estimated size 558.8 KiB, free 364.4 MiB) | |
2023-05-17 16:30:15,542 INFO memory.MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 210.0 KiB, free 364.2 MiB) | |
2023-05-17 16:30:15,543 INFO storage.BlockManagerInfo: Added broadcast_7_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 210.0 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:15,544 INFO spark.SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,544 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:15,544 INFO scheduler.TaskSchedulerImpl: Adding task set 6.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:15,546 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 6.0 (TID 9) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4332 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,546 INFO executor.Executor: Running task 0.0 in stage 6.0 (TID 9) | |
2023-05-17 16:30:15,629 INFO storage.BlockManagerInfo: Removed broadcast_5_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 2.6 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:15,638 INFO executor.Executor: Finished task 0.0 in stage 6.0 (TID 9). 840 bytes result sent to driver | |
2023-05-17 16:30:15,639 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 6.0 (TID 9) in 94 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:15,639 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,640 INFO scheduler.DAGScheduler: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) finished in 0.140 s | |
2023-05-17 16:30:15,640 INFO scheduler.DAGScheduler: Job 8 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:15,640 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 6: Stage finished | |
2023-05-17 16:30:15,645 INFO scheduler.DAGScheduler: Job 8 finished: collect at HoodieSparkEngineContext.java:103, took 0.148226 s | |
2023-05-17 16:30:15,658 INFO bloom.SparkHoodieBloomIndexHelper: Input parallelism: 2, Index parallelism: 2 | |
2023-05-17 16:30:15,679 INFO spark.SparkContext: Starting job: countByKey at SparkHoodieBloomIndexHelper.java:195 | |
2023-05-17 16:30:15,682 INFO scheduler.DAGScheduler: Registering RDD 27 (countByKey at SparkHoodieBloomIndexHelper.java:195) as input to shuffle 4 | |
2023-05-17 16:30:15,682 INFO scheduler.DAGScheduler: Got job 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) with 2 output partitions | |
2023-05-17 16:30:15,682 INFO scheduler.DAGScheduler: Final stage: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) | |
2023-05-17 16:30:15,682 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 8) | |
2023-05-17 16:30:15,682 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 8) | |
2023-05-17 16:30:15,686 INFO storage.BlockManager: Removing RDD 4 | |
2023-05-17 16:30:15,689 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:30:15,694 INFO memory.MemoryStore: Block broadcast_8 stored as values in memory (estimated size 27.0 KiB, free 364.2 MiB) | |
2023-05-17 16:30:15,696 INFO memory.MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 12.8 KiB, free 364.2 MiB) | |
2023-05-17 16:30:15,696 INFO storage.BlockManagerInfo: Added broadcast_8_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 12.8 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:15,697 INFO spark.SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,697 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,697 INFO scheduler.TaskSchedulerImpl: Adding task set 8.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,700 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 8.0 (TID 10) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,700 INFO executor.Executor: Running task 0.0 in stage 8.0 (TID 10) | |
2023-05-17 16:30:15,705 INFO storage.BlockManager: Found block rdd_17_0 locally | |
2023-05-17 16:30:15,712 INFO executor.Executor: Finished task 0.0 in stage 8.0 (TID 10). 1116 bytes result sent to driver | |
2023-05-17 16:30:15,714 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 8.0 (TID 11) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,714 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 8.0 (TID 10) in 14 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:15,718 INFO executor.Executor: Running task 1.0 in stage 8.0 (TID 11) | |
2023-05-17 16:30:15,722 INFO storage.BlockManagerInfo: Removed broadcast_3_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 14.8 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:15,724 INFO storage.BlockManager: Found block rdd_17_1 locally | |
2023-05-17 16:30:15,730 INFO executor.Executor: Finished task 1.0 in stage 8.0 (TID 11). 1116 bytes result sent to driver | |
2023-05-17 16:30:15,732 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 8.0 (TID 11) in 19 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,732 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,733 INFO scheduler.DAGScheduler: ShuffleMapStage 8 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.042 s | |
2023-05-17 16:30:15,733 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:15,733 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:15,733 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 9) | |
2023-05-17 16:30:15,733 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:15,734 INFO scheduler.DAGScheduler: Submitting ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:30:15,735 INFO storage.BlockManagerInfo: Removed broadcast_6_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 209.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,735 INFO memory.MemoryStore: Block broadcast_9 stored as values in memory (estimated size 4.6 KiB, free 364.5 MiB) | |
2023-05-17 16:30:15,737 INFO memory.MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,737 INFO storage.BlockManagerInfo: Added broadcast_9_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 2.6 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,738 INFO spark.SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,738 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,738 INFO scheduler.TaskSchedulerImpl: Adding task set 9.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,739 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 9.0 (TID 12) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,740 INFO executor.Executor: Running task 0.0 in stage 9.0 (TID 12) | |
2023-05-17 16:30:15,740 INFO storage.BlockManager: Removing RDD 12 | |
2023-05-17 16:30:15,744 INFO storage.BlockManagerInfo: Removed broadcast_4_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 12.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,744 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,744 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,746 INFO executor.Executor: Finished task 0.0 in stage 9.0 (TID 12). 1227 bytes result sent to driver | |
2023-05-17 16:30:15,746 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 9.0 (TID 13) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,747 INFO executor.Executor: Running task 1.0 in stage 9.0 (TID 13) | |
2023-05-17 16:30:15,747 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 9.0 (TID 12) in 8 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:15,748 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,748 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,749 INFO executor.Executor: Finished task 1.0 in stage 9.0 (TID 13). 1227 bytes result sent to driver | |
2023-05-17 16:30:15,750 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 9.0 (TID 13) in 4 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,750 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,751 INFO scheduler.DAGScheduler: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.017 s | |
2023-05-17 16:30:15,751 INFO scheduler.DAGScheduler: Job 9 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:15,752 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 9: Stage finished | |
2023-05-17 16:30:15,752 INFO scheduler.DAGScheduler: Job 9 finished: countByKey at SparkHoodieBloomIndexHelper.java:195, took 0.072649 s | |
2023-05-17 16:30:15,753 INFO bloom.BucketizedBloomCheckPartitioner: TotalBuckets 0, min_buckets/partition 1 | |
2023-05-17 16:30:15,825 INFO rdd.MapPartitionsRDD: Removing RDD 17 from persistence list | |
2023-05-17 16:30:15,828 INFO rdd.MapPartitionsRDD: Removing RDD 35 from persistence list | |
2023-05-17 16:30:15,828 INFO storage.BlockManager: Removing RDD 17 | |
2023-05-17 16:30:15,829 INFO storage.BlockManager: Removing RDD 35 | |
2023-05-17 16:30:15,829 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:15,831 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:15,849 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:30:15,852 INFO scheduler.DAGScheduler: Registering RDD 29 (mapToPair at SparkHoodieBloomIndexHelper.java:164) as input to shuffle 8 | |
2023-05-17 16:30:15,852 INFO scheduler.DAGScheduler: Registering RDD 36 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 5 | |
2023-05-17 16:30:15,852 INFO scheduler.DAGScheduler: Registering RDD 35 (flatMapToPair at SparkHoodieBloomIndexHelper.java:175) as input to shuffle 6 | |
2023-05-17 16:30:15,853 INFO scheduler.DAGScheduler: Registering RDD 45 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 7 | |
2023-05-17 16:30:15,854 INFO scheduler.DAGScheduler: Got job 10 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:30:15,854 INFO scheduler.DAGScheduler: Final stage: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:30:15,854 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 14) | |
2023-05-17 16:30:15,854 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 14) | |
2023-05-17 16:30:15,856 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:30:15,859 INFO memory.MemoryStore: Block broadcast_10 stored as values in memory (estimated size 25.1 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,861 INFO memory.MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 12.1 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,862 INFO storage.BlockManagerInfo: Added broadcast_10_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 12.1 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,862 INFO spark.SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,862 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,863 INFO scheduler.TaskSchedulerImpl: Adding task set 12.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,864 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 12.0 (TID 14) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,864 INFO executor.Executor: Running task 0.0 in stage 12.0 (TID 14) | |
2023-05-17 16:30:15,868 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,868 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,895 INFO executor.Executor: Finished task 0.0 in stage 12.0 (TID 14). 1417 bytes result sent to driver | |
2023-05-17 16:30:15,896 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 12.0 (TID 15) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,897 INFO executor.Executor: Running task 1.0 in stage 12.0 (TID 15) | |
2023-05-17 16:30:15,897 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 12.0 (TID 14) in 33 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:15,902 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,902 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,914 INFO executor.Executor: Finished task 1.0 in stage 12.0 (TID 15). 1417 bytes result sent to driver | |
2023-05-17 16:30:15,915 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 12.0 (TID 15) in 18 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,915 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 12.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,915 INFO scheduler.DAGScheduler: ShuffleMapStage 12 (mapToPair at HoodieJavaRDD.java:135) finished in 0.057 s | |
2023-05-17 16:30:15,915 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:15,916 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:15,916 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15, ShuffleMapStage 14) | |
2023-05-17 16:30:15,916 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:15,917 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:15,922 INFO memory.MemoryStore: Block broadcast_11 stored as values in memory (estimated size 9.9 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,924 INFO memory.MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 5.1 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,924 INFO storage.BlockManagerInfo: Added broadcast_11_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 5.1 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,925 INFO spark.SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,925 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,925 INFO scheduler.TaskSchedulerImpl: Adding task set 14.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,927 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 14.0 (TID 16) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,927 INFO executor.Executor: Running task 0.0 in stage 14.0 (TID 16) | |
2023-05-17 16:30:15,938 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (3.5 KiB) non-empty blocks including 1 (3.5 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,939 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,939 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,939 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,958 INFO memory.MemoryStore: Block rdd_43_0 stored as values in memory (estimated size 8.2 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,959 INFO storage.BlockManagerInfo: Added rdd_43_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 8.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,961 INFO executor.Executor: Finished task 0.0 in stage 14.0 (TID 16). 1417 bytes result sent to driver | |
2023-05-17 16:30:15,962 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 14.0 (TID 17) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,963 INFO executor.Executor: Running task 1.0 in stage 14.0 (TID 17) | |
2023-05-17 16:30:15,963 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 14.0 (TID 16) in 37 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:15,967 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (2.4 KiB) non-empty blocks including 1 (2.4 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,967 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,968 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,968 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:15,980 INFO memory.MemoryStore: Block rdd_43_1 stored as values in memory (estimated size 5.8 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,980 INFO storage.BlockManagerInfo: Added rdd_43_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 5.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,984 INFO executor.Executor: Finished task 1.0 in stage 14.0 (TID 17). 1417 bytes result sent to driver | |
2023-05-17 16:30:15,985 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 14.0 (TID 17) in 23 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:15,985 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 14.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:15,985 INFO scheduler.DAGScheduler: ShuffleMapStage 14 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.067 s | |
2023-05-17 16:30:15,986 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:15,986 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:15,986 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15) | |
2023-05-17 16:30:15,986 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:15,986 INFO scheduler.DAGScheduler: Submitting ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:15,988 INFO memory.MemoryStore: Block broadcast_12 stored as values in memory (estimated size 4.6 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,990 INFO memory.MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 365.0 MiB) | |
2023-05-17 16:30:15,990 INFO storage.BlockManagerInfo: Added broadcast_12_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 2.6 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:15,991 INFO spark.SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:15,991 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:15,991 INFO scheduler.TaskSchedulerImpl: Adding task set 15.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:15,993 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 15.0 (TID 18) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:15,993 INFO executor.Executor: Running task 1.0 in stage 15.0 (TID 18) | |
2023-05-17 16:30:15,996 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (194.0 B) non-empty blocks including 2 (194.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:15,996 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:16,002 INFO executor.Executor: Finished task 1.0 in stage 15.0 (TID 18). 1284 bytes result sent to driver | |
2023-05-17 16:30:16,003 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 15.0 (TID 19) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:16,003 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 15.0 (TID 18) in 11 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:16,004 INFO executor.Executor: Running task 0.0 in stage 15.0 (TID 19) | |
2023-05-17 16:30:16,006 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:16,006 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:16,007 INFO executor.Executor: Finished task 0.0 in stage 15.0 (TID 19). 1227 bytes result sent to driver | |
2023-05-17 16:30:16,008 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 15.0 (TID 19) in 5 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:16,008 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:16,009 INFO scheduler.DAGScheduler: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.022 s | |
2023-05-17 16:30:16,009 INFO scheduler.DAGScheduler: Job 10 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:16,009 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 15: Stage finished | |
2023-05-17 16:30:16,010 INFO scheduler.DAGScheduler: Job 10 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.160281 s | |
2023-05-17 16:30:16,010 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=99, numUpdates=0}, InputPartitionStat={2018/08/31=WorkloadStat {numInserts=99, numUpdates=0}}, OutputPartitionStat={}, operationType=UPSERT} | |
2023-05-17 16:30:16,011 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:30:16,124 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:30:16,124 INFO scheduler.DAGScheduler: Got job 11 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:30:16,124 INFO scheduler.DAGScheduler: Final stage: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:30:16,124 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:16,125 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:16,125 INFO scheduler.DAGScheduler: Submitting ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:30:16,173 INFO memory.MemoryStore: Block broadcast_13 stored as values in memory (estimated size 559.7 KiB, free 364.4 MiB) | |
2023-05-17 16:30:16,177 INFO memory.MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 209.9 KiB, free 364.2 MiB) | |
2023-05-17 16:30:16,177 INFO storage.BlockManagerInfo: Added broadcast_13_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 209.9 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:16,177 INFO spark.SparkContext: Created broadcast 13 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:16,178 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:16,178 INFO scheduler.TaskSchedulerImpl: Adding task set 16.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:16,179 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 16.0 (TID 20) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:16,179 INFO executor.Executor: Running task 0.0 in stage 16.0 (TID 20) | |
2023-05-17 16:30:16,215 INFO executor.Executor: Finished task 0.0 in stage 16.0 (TID 20). 834 bytes result sent to driver | |
2023-05-17 16:30:16,218 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 16.0 (TID 20) in 39 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:16,218 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 16.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:16,219 INFO scheduler.DAGScheduler: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) finished in 0.093 s | |
2023-05-17 16:30:16,219 INFO scheduler.DAGScheduler: Job 11 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:16,219 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 16: Stage finished | |
2023-05-17 16:30:16,220 INFO scheduler.DAGScheduler: Job 11 finished: collectAsMap at UpsertPartitioner.java:279, took 0.095766 s | |
2023-05-17 16:30:16,221 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:16,223 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:16,223 INFO commit.UpsertPartitioner: For partitionPath : 2018/08/31 Small Files => [] | |
2023-05-17 16:30:16,223 INFO commit.UpsertPartitioner: After small file assignment: unassignedInserts => 99, totalInsertBuckets => 1, recordsPerBucket => 122880 | |
2023-05-17 16:30:16,228 INFO commit.UpsertPartitioner: Total insert buckets for partition path 2018/08/31 => [(InsertBucket {bucketNumber=0, weight=1.0},1.0)] | |
2023-05-17 16:30:16,228 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=INSERT, fileIdPrefix=01f08b83-1a13-4b4f-a3b7-5ddd080217ea, partitionPath=2018/08/31}}, | |
Partition to insert buckets => {2018/08/31=[(InsertBucket {bucketNumber=0, weight=1.0},1.0)]}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:30:16,240 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/20230517163012379.commit.requested | |
2023-05-17 16:30:16,264 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/20230517163012379.inflight | |
2023-05-17 16:30:16,309 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:30:16,309 INFO commit.BaseCommitActionExecutor: Auto commit disabled for 20230517163012379 | |
2023-05-17 16:30:16,324 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:696 | |
2023-05-17 16:30:16,327 INFO scheduler.DAGScheduler: Registering RDD 49 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 9 | |
2023-05-17 16:30:16,327 INFO scheduler.DAGScheduler: Got job 12 (sum at DeltaSync.java:696) with 1 output partitions | |
2023-05-17 16:30:16,327 INFO scheduler.DAGScheduler: Final stage: ResultStage 22 (sum at DeltaSync.java:696) | |
2023-05-17 16:30:16,327 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 21) | |
2023-05-17 16:30:16,328 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 21) | |
2023-05-17 16:30:16,333 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:30:16,372 INFO memory.MemoryStore: Block broadcast_14 stored as values in memory (estimated size 565.7 KiB, free 363.7 MiB) | |
2023-05-17 16:30:16,376 INFO memory.MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 212.7 KiB, free 363.5 MiB) | |
2023-05-17 16:30:16,376 INFO storage.BlockManagerInfo: Added broadcast_14_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 212.7 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:16,377 INFO spark.SparkContext: Created broadcast 14 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:16,377 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:30:16,377 INFO scheduler.TaskSchedulerImpl: Adding task set 21.0 with 2 tasks resource profile 0 | |
2023-05-17 16:30:16,379 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 21.0 (TID 21) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:16,379 INFO executor.Executor: Running task 0.0 in stage 21.0 (TID 21) | |
2023-05-17 16:30:16,409 INFO storage.BlockManager: Found block rdd_43_0 locally | |
2023-05-17 16:30:16,420 INFO executor.Executor: Finished task 0.0 in stage 21.0 (TID 21). 1072 bytes result sent to driver | |
2023-05-17 16:30:16,421 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 21.0 (TID 22) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:16,422 INFO executor.Executor: Running task 1.0 in stage 21.0 (TID 22) | |
2023-05-17 16:30:16,422 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 21.0 (TID 21) in 44 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:30:16,454 INFO storage.BlockManager: Found block rdd_43_1 locally | |
2023-05-17 16:30:16,462 INFO executor.Executor: Finished task 1.0 in stage 21.0 (TID 22). 1072 bytes result sent to driver | |
2023-05-17 16:30:16,463 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 21.0 (TID 22) in 42 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:30:16,463 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:16,464 INFO scheduler.DAGScheduler: ShuffleMapStage 21 (mapToPair at HoodieJavaRDD.java:135) finished in 0.129 s | |
2023-05-17 16:30:16,464 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:16,464 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:16,464 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 22) | |
2023-05-17 16:30:16,464 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:16,465 INFO scheduler.DAGScheduler: Submitting ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696), which has no missing parents | |
2023-05-17 16:30:16,524 INFO memory.MemoryStore: Block broadcast_15 stored as values in memory (estimated size 573.2 KiB, free 362.9 MiB) | |
2023-05-17 16:30:16,528 INFO memory.MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 216.7 KiB, free 362.7 MiB) | |
2023-05-17 16:30:16,528 INFO storage.BlockManagerInfo: Added broadcast_15_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 216.7 KiB, free: 365.4 MiB) | |
2023-05-17 16:30:16,529 INFO spark.SparkContext: Created broadcast 15 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:16,529 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:16,530 INFO scheduler.TaskSchedulerImpl: Adding task set 22.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:16,531 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 22.0 (TID 23) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:16,531 INFO executor.Executor: Running task 0.0 in stage 22.0 (TID 23) | |
2023-05-17 16:30:16,575 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (6.2 KiB) non-empty blocks including 2 (6.2 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:16,576 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:16,594 INFO queue.SimpleExecutor: Starting consumer, consuming records from the records iterator directly | |
2023-05-17 16:30:16,638 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t163003. Falling back to direct markers. | |
2023-05-17 16:30:16,644 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t163003/.hoodie/.temp/20230517163012379/2018/08/31/01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0_0-22-23_20230517163012379.parquet.marker.CREATE | |
2023-05-17 16:30:16,651 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t163003/.hoodie/.temp/20230517163012379/2018/08/31/01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0_0-22-23_20230517163012379.parquet.marker.CREATE in 12 ms | |
2023-05-17 16:30:16,804 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:30:16,818 INFO storage.BlockManagerInfo: Removed broadcast_8_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 12.8 KiB, free: 365.4 MiB) | |
2023-05-17 16:30:16,848 INFO storage.BlockManagerInfo: Removed broadcast_12_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 2.6 KiB, free: 365.4 MiB) | |
2023-05-17 16:30:16,871 INFO storage.BlockManagerInfo: Removed broadcast_7_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 210.0 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:16,883 INFO storage.BlockManagerInfo: Removed broadcast_9_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 2.6 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:16,890 INFO storage.BlockManagerInfo: Removed broadcast_11_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 5.1 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:16,895 INFO storage.BlockManagerInfo: Removed broadcast_14_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 212.7 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:16,900 INFO storage.BlockManagerInfo: Removed broadcast_10_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 12.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:16,909 INFO storage.BlockManagerInfo: Removed broadcast_13_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 209.9 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:17,184 INFO io.HoodieCreateHandle: New CreateHandle for partition :2018/08/31 with fileId 01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0 | |
2023-05-17 16:30:17,251 INFO io.HoodieCreateHandle: Closing the file 01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0 as we are done with all the records 99 | |
2023-05-17 16:30:17,251 INFO hadoop.InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 17352 | |
2023-05-17 16:30:17,372 INFO io.HoodieCreateHandle: CreateHandle for partitionPath 2018/08/31 fileID 01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0, took 775 ms. | |
2023-05-17 16:30:17,374 INFO memory.MemoryStore: Block rdd_53_0 stored as values in memory (estimated size 376.0 B, free 365.1 MiB) | |
2023-05-17 16:30:17,374 INFO storage.BlockManagerInfo: Added rdd_53_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 376.0 B, free: 366.0 MiB) | |
2023-05-17 16:30:17,382 INFO executor.Executor: Finished task 0.0 in stage 22.0 (TID 23). 1146 bytes result sent to driver | |
2023-05-17 16:30:17,384 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 22.0 (TID 23) in 854 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:17,384 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 22.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:17,384 INFO scheduler.DAGScheduler: ResultStage 22 (sum at DeltaSync.java:696) finished in 0.918 s | |
2023-05-17 16:30:17,385 INFO scheduler.DAGScheduler: Job 12 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:17,385 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 22: Stage finished | |
2023-05-17 16:30:17,385 INFO scheduler.DAGScheduler: Job 12 finished: sum at DeltaSync.java:696, took 1.061623 s | |
2023-05-17 16:30:17,393 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:697 | |
2023-05-17 16:30:17,396 INFO scheduler.DAGScheduler: Got job 13 (sum at DeltaSync.java:697) with 1 output partitions | |
2023-05-17 16:30:17,396 INFO scheduler.DAGScheduler: Final stage: ResultStage 28 (sum at DeltaSync.java:697) | |
2023-05-17 16:30:17,396 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 27) | |
2023-05-17 16:30:17,397 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:17,397 INFO scheduler.DAGScheduler: Submitting ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697), which has no missing parents | |
2023-05-17 16:30:17,458 INFO memory.MemoryStore: Block broadcast_16 stored as values in memory (estimated size 573.2 KiB, free 364.5 MiB) | |
2023-05-17 16:30:17,462 INFO memory.MemoryStore: Block broadcast_16_piece0 stored as bytes in memory (estimated size 216.7 KiB, free 364.3 MiB) | |
2023-05-17 16:30:17,463 INFO storage.BlockManagerInfo: Added broadcast_16_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 216.7 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:17,463 INFO spark.SparkContext: Created broadcast 16 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:17,465 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:17,465 INFO scheduler.TaskSchedulerImpl: Adding task set 28.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:17,466 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 28.0 (TID 24) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:17,467 INFO executor.Executor: Running task 0.0 in stage 28.0 (TID 24) | |
2023-05-17 16:30:17,517 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:30:17,518 INFO executor.Executor: Finished task 0.0 in stage 28.0 (TID 24). 845 bytes result sent to driver | |
2023-05-17 16:30:17,519 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 28.0 (TID 24) in 53 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:17,519 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 28.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:17,519 INFO scheduler.DAGScheduler: ResultStage 28 (sum at DeltaSync.java:697) finished in 0.121 s | |
2023-05-17 16:30:17,520 INFO scheduler.DAGScheduler: Job 13 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:17,520 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 28: Stage finished | |
2023-05-17 16:30:17,520 INFO scheduler.DAGScheduler: Job 13 finished: sum at DeltaSync.java:697, took 0.126373 s | |
2023-05-17 16:30:17,606 INFO spark.SparkContext: Starting job: collect at SparkRDDWriteClient.java:101 | |
2023-05-17 16:30:17,608 INFO scheduler.DAGScheduler: Got job 14 (collect at SparkRDDWriteClient.java:101) with 1 output partitions | |
2023-05-17 16:30:17,609 INFO scheduler.DAGScheduler: Final stage: ResultStage 34 (collect at SparkRDDWriteClient.java:101) | |
2023-05-17 16:30:17,609 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 33) | |
2023-05-17 16:30:17,609 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:17,610 INFO scheduler.DAGScheduler: Submitting ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101), which has no missing parents | |
2023-05-17 16:30:17,650 INFO memory.MemoryStore: Block broadcast_17 stored as values in memory (estimated size 573.3 KiB, free 363.7 MiB) | |
2023-05-17 16:30:17,653 INFO memory.MemoryStore: Block broadcast_17_piece0 stored as bytes in memory (estimated size 216.8 KiB, free 363.5 MiB) | |
2023-05-17 16:30:17,654 INFO storage.BlockManagerInfo: Added broadcast_17_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 216.8 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:17,654 INFO spark.SparkContext: Created broadcast 17 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:17,654 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:17,654 INFO scheduler.TaskSchedulerImpl: Adding task set 34.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:17,656 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 34.0 (TID 25) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:17,656 INFO executor.Executor: Running task 0.0 in stage 34.0 (TID 25) | |
2023-05-17 16:30:17,677 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:30:17,679 INFO executor.Executor: Finished task 0.0 in stage 34.0 (TID 25). 1162 bytes result sent to driver | |
2023-05-17 16:30:17,680 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 34.0 (TID 25) in 25 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:17,680 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 34.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:17,680 INFO scheduler.DAGScheduler: ResultStage 34 (collect at SparkRDDWriteClient.java:101) finished in 0.069 s | |
2023-05-17 16:30:17,681 INFO scheduler.DAGScheduler: Job 14 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:17,681 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 34: Stage finished | |
2023-05-17 16:30:17,681 INFO scheduler.DAGScheduler: Job 14 finished: collect at SparkRDDWriteClient.java:101, took 0.074913 s | |
2023-05-17 16:30:17,681 INFO client.BaseHoodieWriteClient: Committing 20230517163012379 action commit | |
2023-05-17 16:30:17,681 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,683 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,685 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,685 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,687 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__INFLIGHT]} | |
2023-05-17 16:30:17,687 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,689 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,690 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,691 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,692 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,693 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,694 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,695 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,695 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,695 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:17,695 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:17,696 INFO util.CommitUtils: Creating metadata for UPSERT numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:30:17,698 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,699 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,700 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,700 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,702 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__INFLIGHT]} | |
2023-05-17 16:30:17,702 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,703 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,705 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,705 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,706 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,708 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,709 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,709 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,710 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,710 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:17,710 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:17,710 INFO client.BaseHoodieWriteClient: Committing 20230517163012379 action commit | |
2023-05-17 16:30:17,710 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t163003. Falling back to direct markers. | |
2023-05-17 16:30:17,753 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:30:17,754 INFO scheduler.DAGScheduler: Got job 15 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:30:17,754 INFO scheduler.DAGScheduler: Final stage: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:30:17,754 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:17,754 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:17,755 INFO scheduler.DAGScheduler: Submitting ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:30:17,769 INFO memory.MemoryStore: Block broadcast_18 stored as values in memory (estimated size 128.1 KiB, free 363.4 MiB) | |
2023-05-17 16:30:17,771 INFO memory.MemoryStore: Block broadcast_18_piece0 stored as bytes in memory (estimated size 48.2 KiB, free 363.3 MiB) | |
2023-05-17 16:30:17,771 INFO storage.BlockManagerInfo: Added broadcast_18_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 48.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:17,772 INFO spark.SparkContext: Created broadcast 18 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:17,772 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:17,772 INFO scheduler.TaskSchedulerImpl: Adding task set 35.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:17,773 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 35.0 (TID 26) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:17,774 INFO executor.Executor: Running task 0.0 in stage 35.0 (TID 26) | |
2023-05-17 16:30:17,793 INFO executor.Executor: Finished task 0.0 in stage 35.0 (TID 26). 884 bytes result sent to driver | |
2023-05-17 16:30:17,794 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 35.0 (TID 26) in 21 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:17,794 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 35.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:17,795 INFO scheduler.DAGScheduler: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) finished in 0.040 s | |
2023-05-17 16:30:17,795 INFO scheduler.DAGScheduler: Job 15 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:17,795 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 35: Stage finished | |
2023-05-17 16:30:17,796 INFO scheduler.DAGScheduler: Job 15 finished: collect at HoodieSparkEngineContext.java:137, took 0.041942 s | |
2023-05-17 16:30:17,800 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,801 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,803 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,803 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,804 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,806 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,807 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,807 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,808 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,808 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:30:17,809 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,809 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,809 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:30:17,814 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=4, StoreTimeTaken=0 | |
2023-05-17 16:30:17,818 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,819 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,820 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,821 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,822 INFO metadata.HoodieBackedTableMetadataWriter: Async metadata indexing enabled and following partitions already initialized: [files] | |
2023-05-17 16:30:17,822 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,823 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,825 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:17,825 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,826 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,827 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,828 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,829 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,829 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,853 INFO metadata.HoodieTableMetadataUtil: Updating at 20230517163012379 from Commit/UPSERT. #partitions_updated=2 | |
2023-05-17 16:30:17,855 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,855 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,855 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:30:17,856 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,856 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:17,856 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:30:17,857 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
2023-05-17 16:30:17,870 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:30:17,871 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:30:17,871 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,872 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,874 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,874 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,876 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,876 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:17,876 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:17,877 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,878 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,880 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,880 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,881 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,881 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:17,881 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:17,885 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,887 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__INFLIGHT]} | |
2023-05-17 16:30:17,889 INFO client.BaseHoodieWriteClient: Scheduling table service COMPACT | |
2023-05-17 16:30:17,890 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,891 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,893 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,893 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,894 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,894 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:17,894 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:17,896 INFO client.BaseHoodieWriteClient: Scheduling compaction at instant time :00000000000000001 | |
2023-05-17 16:30:17,904 INFO compact.ScheduleCompactionActionExecutor: Checking if compaction needs to be run on /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,909 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,911 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,913 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,913 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,914 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,914 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:30:17,914 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,915 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,917 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,917 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,918 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:30:17,919 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:17,919 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:17,919 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517163012379 action: deltacommit | |
2023-05-17 16:30:17,920 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517163012379__deltacommit__REQUESTED] | |
2023-05-17 16:30:17,927 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,928 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:17,930 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,930 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:17,933 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__deltacommit__REQUESTED]} | |
2023-05-17 16:30:17,934 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:17,934 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:17,977 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:30:17,977 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:30:17,979 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:17,980 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:18,000 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:30:18,002 INFO scheduler.DAGScheduler: Registering RDD 66 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 10 | |
2023-05-17 16:30:18,003 INFO scheduler.DAGScheduler: Got job 16 (countByKey at HoodieJavaPairRDD.java:105) with 1 output partitions | |
2023-05-17 16:30:18,003 INFO scheduler.DAGScheduler: Final stage: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:30:18,003 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 36) | |
2023-05-17 16:30:18,003 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 36) | |
2023-05-17 16:30:18,004 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:18,007 INFO memory.MemoryStore: Block broadcast_19 stored as values in memory (estimated size 9.5 KiB, free 363.3 MiB) | |
2023-05-17 16:30:18,009 INFO memory.MemoryStore: Block broadcast_19_piece0 stored as bytes in memory (estimated size 5.2 KiB, free 363.3 MiB) | |
2023-05-17 16:30:18,009 INFO storage.BlockManagerInfo: Added broadcast_19_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 5.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:18,010 INFO spark.SparkContext: Created broadcast 19 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:18,010 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:18,011 INFO scheduler.TaskSchedulerImpl: Adding task set 36.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:18,015 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 36.0 (TID 27) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:18,015 INFO executor.Executor: Running task 0.0 in stage 36.0 (TID 27) | |
2023-05-17 16:30:18,024 INFO memory.MemoryStore: Block rdd_64_0 stored as values in memory (estimated size 380.0 B, free 363.3 MiB) | |
2023-05-17 16:30:18,024 INFO storage.BlockManagerInfo: Added rdd_64_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 380.0 B, free: 365.6 MiB) | |
2023-05-17 16:30:18,032 INFO executor.Executor: Finished task 0.0 in stage 36.0 (TID 27). 1115 bytes result sent to driver | |
2023-05-17 16:30:18,033 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 36.0 (TID 27) in 22 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:18,033 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 36.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:18,034 INFO scheduler.DAGScheduler: ShuffleMapStage 36 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.029 s | |
2023-05-17 16:30:18,035 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:18,035 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:18,035 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 37) | |
2023-05-17 16:30:18,035 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:18,035 INFO scheduler.DAGScheduler: Submitting ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:30:18,037 INFO memory.MemoryStore: Block broadcast_20 stored as values in memory (estimated size 4.6 KiB, free 363.3 MiB) | |
2023-05-17 16:30:18,039 INFO memory.MemoryStore: Block broadcast_20_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 363.3 MiB) | |
2023-05-17 16:30:18,039 INFO storage.BlockManagerInfo: Added broadcast_20_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 2.6 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:18,040 INFO spark.SparkContext: Created broadcast 20 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:18,042 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:18,042 INFO scheduler.TaskSchedulerImpl: Adding task set 37.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:18,043 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 37.0 (TID 28) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:18,044 INFO executor.Executor: Running task 0.0 in stage 37.0 (TID 28) | |
2023-05-17 16:30:18,046 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (117.0 B) non-empty blocks including 1 (117.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:18,046 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:18,054 INFO executor.Executor: Finished task 0.0 in stage 37.0 (TID 28). 1303 bytes result sent to driver | |
2023-05-17 16:30:18,055 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 37.0 (TID 28) in 12 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:18,055 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 37.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:18,056 INFO scheduler.DAGScheduler: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.020 s | |
2023-05-17 16:30:18,056 INFO scheduler.DAGScheduler: Job 16 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:18,056 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 37: Stage finished | |
2023-05-17 16:30:18,057 INFO scheduler.DAGScheduler: Job 16 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.056476 s | |
2023-05-17 16:30:18,058 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=2}, InputPartitionStat={files=WorkloadStat {numInserts=0, numUpdates=2}}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:30:18,058 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:30:18,156 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:30:18,157 INFO scheduler.DAGScheduler: Got job 17 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:30:18,157 INFO scheduler.DAGScheduler: Final stage: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:30:18,157 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:18,157 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:18,158 INFO scheduler.DAGScheduler: Submitting ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:30:18,188 INFO memory.MemoryStore: Block broadcast_21 stored as values in memory (estimated size 431.5 KiB, free 362.9 MiB) | |
2023-05-17 16:30:18,191 INFO memory.MemoryStore: Block broadcast_21_piece0 stored as bytes in memory (estimated size 161.5 KiB, free 362.7 MiB) | |
2023-05-17 16:30:18,191 INFO storage.BlockManagerInfo: Added broadcast_21_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 161.5 KiB, free: 365.4 MiB) | |
2023-05-17 16:30:18,192 INFO spark.SparkContext: Created broadcast 21 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:18,192 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:18,192 INFO scheduler.TaskSchedulerImpl: Adding task set 38.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:18,194 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 38.0 (TID 29) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4339 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:18,194 INFO executor.Executor: Running task 0.0 in stage 38.0 (TID 29) | |
2023-05-17 16:30:18,209 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:18,210 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:18,210 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:18,210 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:18,212 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:18,212 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:30:18,214 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
2023-05-17 16:30:18,219 INFO executor.Executor: Finished task 0.0 in stage 38.0 (TID 29). 829 bytes result sent to driver | |
2023-05-17 16:30:18,220 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 38.0 (TID 29) in 27 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:18,220 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 38.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:18,220 INFO scheduler.DAGScheduler: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) finished in 0.062 s | |
2023-05-17 16:30:18,220 INFO scheduler.DAGScheduler: Job 17 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:18,220 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 38: Stage finished | |
2023-05-17 16:30:18,220 INFO scheduler.DAGScheduler: Job 17 finished: collectAsMap at UpsertPartitioner.java:279, took 0.063969 s | |
2023-05-17 16:30:18,221 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:18,221 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:18,221 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=UPDATE, fileIdPrefix=files-0000, partitionPath=files}}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{files-0000=0} | |
2023-05-17 16:30:18,222 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/20230517163012379.deltacommit.requested | |
2023-05-17 16:30:18,229 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/20230517163012379.deltacommit.inflight | |
2023-05-17 16:30:18,646 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/20230517163012379.deltacommit.inflight | |
2023-05-17 16:30:18,716 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:30:18,716 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 20230517163012379 | |
2023-05-17 16:30:18,740 INFO storage.BlockManagerInfo: Removed broadcast_16_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 216.7 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:18,745 INFO storage.BlockManagerInfo: Removed broadcast_21_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 161.5 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:18,754 INFO storage.BlockManagerInfo: Removed broadcast_20_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 2.6 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:18,763 INFO storage.BlockManagerInfo: Removed broadcast_19_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 5.2 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:18,768 INFO storage.BlockManagerInfo: Removed broadcast_17_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 216.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:18,785 INFO storage.BlockManagerInfo: Removed broadcast_18_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 48.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:30:18,814 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:30:18,816 INFO scheduler.DAGScheduler: Registering RDD 70 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 11 | |
2023-05-17 16:30:18,816 INFO scheduler.DAGScheduler: Got job 18 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:30:18,816 INFO scheduler.DAGScheduler: Final stage: ResultStage 40 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:30:18,816 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 39) | |
2023-05-17 16:30:18,817 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 39) | |
2023-05-17 16:30:18,819 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:30:18,860 INFO memory.MemoryStore: Block broadcast_22 stored as values in memory (estimated size 436.4 KiB, free 364.6 MiB) | |
2023-05-17 16:30:18,863 INFO memory.MemoryStore: Block broadcast_22_piece0 stored as bytes in memory (estimated size 163.1 KiB, free 364.5 MiB) | |
2023-05-17 16:30:18,863 INFO storage.BlockManagerInfo: Added broadcast_22_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 163.1 KiB, free: 365.9 MiB) | |
2023-05-17 16:30:18,863 INFO spark.SparkContext: Created broadcast 22 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:18,864 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:18,864 INFO scheduler.TaskSchedulerImpl: Adding task set 39.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:18,865 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 39.0 (TID 30) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:18,865 INFO executor.Executor: Running task 0.0 in stage 39.0 (TID 30) | |
2023-05-17 16:30:18,887 INFO storage.BlockManager: Found block rdd_64_0 locally | |
2023-05-17 16:30:18,892 INFO executor.Executor: Finished task 0.0 in stage 39.0 (TID 30). 1072 bytes result sent to driver | |
2023-05-17 16:30:18,893 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 39.0 (TID 30) in 28 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:18,893 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 39.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:18,894 INFO scheduler.DAGScheduler: ShuffleMapStage 39 (mapToPair at HoodieJavaRDD.java:135) finished in 0.073 s | |
2023-05-17 16:30:18,894 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:30:18,894 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:30:18,894 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 40) | |
2023-05-17 16:30:18,894 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:30:18,895 INFO scheduler.DAGScheduler: Submitting ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:30:18,940 INFO memory.MemoryStore: Block broadcast_23 stored as values in memory (estimated size 566.5 KiB, free 363.9 MiB) | |
2023-05-17 16:30:18,943 INFO memory.MemoryStore: Block broadcast_23_piece0 stored as bytes in memory (estimated size 214.1 KiB, free 363.7 MiB) | |
2023-05-17 16:30:18,943 INFO storage.BlockManagerInfo: Added broadcast_23_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 214.1 KiB, free: 365.7 MiB) | |
2023-05-17 16:30:18,943 INFO spark.SparkContext: Created broadcast 23 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:18,944 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:18,944 INFO scheduler.TaskSchedulerImpl: Adding task set 40.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:18,945 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 40.0 (TID 31) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:18,945 INFO executor.Executor: Running task 0.0 in stage 40.0 (TID 31) | |
2023-05-17 16:30:18,975 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (334.0 B) non-empty blocks including 1 (334.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:30:18,975 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:30:18,992 INFO deltacommit.BaseSparkDeltaCommitActionExecutor: Merging updates for commit 20230517163012379 for file files-0000 | |
2023-05-17 16:30:19,008 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:19,008 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:19,008 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:19,009 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:19,009 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:19,009 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:30:19,011 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed.] | |
2023-05-17 16:30:20,381 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/.temp/20230517163012379/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND | |
2023-05-17 16:30:20,387 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/.temp/20230517163012379/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND in 9 ms | |
2023-05-17 16:30:20,387 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:30:20,388 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:30:20,469 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=124} exists. Appending to existing file | |
2023-05-17 16:30:20,693 INFO impl.MetricsConfig: Loaded properties from hadoop-metrics2.properties | |
2023-05-17 16:30:20,706 INFO impl.MetricsSystemImpl: Scheduled Metric snapshot period at 300 second(s). | |
2023-05-17 16:30:20,706 INFO impl.MetricsSystemImpl: HBase metrics system started | |
2023-05-17 16:30:20,731 INFO metrics.MetricRegistries: Loaded MetricRegistries class org.apache.hudi.org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl | |
2023-05-17 16:30:20,768 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:30:20,771 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:30:20,892 INFO storage.BlockManagerInfo: Removed broadcast_22_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 163.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:20,935 INFO io.HoodieAppendHandle: AppendHandle for partitionPath files filePath files/.files-0000_00000000000000.log.1_0-0-0, took 1930 ms. | |
2023-05-17 16:30:20,943 INFO memory.MemoryStore: Block rdd_74_0 stored as values in memory (estimated size 381.0 B, free 364.3 MiB) | |
2023-05-17 16:30:20,943 INFO storage.BlockManagerInfo: Added rdd_74_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 381.0 B, free: 365.8 MiB) | |
2023-05-17 16:30:20,950 INFO executor.Executor: Finished task 0.0 in stage 40.0 (TID 31). 1558 bytes result sent to driver | |
2023-05-17 16:30:20,950 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 40.0 (TID 31) in 2005 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:20,951 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 40.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:20,951 INFO scheduler.DAGScheduler: ResultStage 40 (collect at HoodieJavaRDD.java:163) finished in 2.056 s | |
2023-05-17 16:30:20,951 INFO scheduler.DAGScheduler: Job 18 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:20,952 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 40: Stage finished | |
2023-05-17 16:30:20,952 INFO scheduler.DAGScheduler: Job 18 finished: collect at HoodieJavaRDD.java:163, took 2.137245 s | |
2023-05-17 16:30:20,952 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:30:21,059 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:30:21,060 INFO scheduler.DAGScheduler: Got job 19 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:30:21,060 INFO scheduler.DAGScheduler: Final stage: ResultStage 42 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:30:21,060 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 41) | |
2023-05-17 16:30:21,061 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:21,061 INFO scheduler.DAGScheduler: Submitting ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:30:21,108 INFO memory.MemoryStore: Block broadcast_24 stored as values in memory (estimated size 566.5 KiB, free 363.7 MiB) | |
2023-05-17 16:30:21,111 INFO memory.MemoryStore: Block broadcast_24_piece0 stored as bytes in memory (estimated size 214.1 KiB, free 363.5 MiB) | |
2023-05-17 16:30:21,112 INFO storage.BlockManagerInfo: Added broadcast_24_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 214.1 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:21,112 INFO spark.SparkContext: Created broadcast 24 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:21,113 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:21,113 INFO scheduler.TaskSchedulerImpl: Adding task set 42.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:21,114 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 42.0 (TID 32) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:21,114 INFO executor.Executor: Running task 0.0 in stage 42.0 (TID 32) | |
2023-05-17 16:30:21,141 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:30:21,142 INFO executor.Executor: Finished task 0.0 in stage 42.0 (TID 32). 1128 bytes result sent to driver | |
2023-05-17 16:30:21,143 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 42.0 (TID 32) in 29 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:21,143 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 42.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:21,144 INFO scheduler.DAGScheduler: ResultStage 42 (collect at HoodieJavaRDD.java:163) finished in 0.082 s | |
2023-05-17 16:30:21,144 INFO scheduler.DAGScheduler: Job 19 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:21,144 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 42: Stage finished | |
2023-05-17 16:30:21,144 INFO scheduler.DAGScheduler: Job 19 finished: collect at HoodieJavaRDD.java:163, took 0.085149 s | |
2023-05-17 16:30:21,145 INFO commit.BaseSparkCommitActionExecutor: Committing 20230517163012379, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:30:21,191 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:30:21,191 INFO scheduler.DAGScheduler: Got job 20 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:30:21,191 INFO scheduler.DAGScheduler: Final stage: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:30:21,191 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:21,191 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:21,192 INFO scheduler.DAGScheduler: Submitting ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:30:21,203 INFO memory.MemoryStore: Block broadcast_25 stored as values in memory (estimated size 128.1 KiB, free 363.4 MiB) | |
2023-05-17 16:30:21,204 INFO memory.MemoryStore: Block broadcast_25_piece0 stored as bytes in memory (estimated size 48.2 KiB, free 363.4 MiB) | |
2023-05-17 16:30:21,205 INFO storage.BlockManagerInfo: Added broadcast_25_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 48.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:21,205 INFO spark.SparkContext: Created broadcast 25 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:21,205 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:21,206 INFO scheduler.TaskSchedulerImpl: Adding task set 43.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:21,207 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 43.0 (TID 33) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:21,207 INFO executor.Executor: Running task 0.0 in stage 43.0 (TID 33) | |
2023-05-17 16:30:21,214 INFO executor.Executor: Finished task 0.0 in stage 43.0 (TID 33). 797 bytes result sent to driver | |
2023-05-17 16:30:21,215 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 43.0 (TID 33) in 9 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:21,215 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 43.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:21,216 INFO scheduler.DAGScheduler: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) finished in 0.024 s | |
2023-05-17 16:30:21,216 INFO scheduler.DAGScheduler: Job 20 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:21,216 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 43: Stage finished | |
2023-05-17 16:30:21,216 INFO scheduler.DAGScheduler: Job 20 finished: collect at HoodieSparkEngineContext.java:137, took 0.025482 s | |
2023-05-17 16:30:21,225 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517163012379__deltacommit__INFLIGHT] | |
2023-05-17 16:30:21,225 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/20230517163012379.deltacommit.inflight | |
2023-05-17 16:30:21,642 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/20230517163012379.deltacommit | |
2023-05-17 16:30:21,642 INFO timeline.HoodieActiveTimeline: Completed [==>20230517163012379__deltacommit__INFLIGHT] | |
2023-05-17 16:30:21,642 INFO commit.BaseSparkCommitActionExecutor: Committed 20230517163012379 | |
2023-05-17 16:30:21,681 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:30:21,682 INFO scheduler.DAGScheduler: Got job 21 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:30:21,682 INFO scheduler.DAGScheduler: Final stage: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:30:21,682 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:21,682 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:21,682 INFO scheduler.DAGScheduler: Submitting ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:30:21,692 INFO memory.MemoryStore: Block broadcast_26 stored as values in memory (estimated size 128.2 KiB, free 363.2 MiB) | |
2023-05-17 16:30:21,694 INFO memory.MemoryStore: Block broadcast_26_piece0 stored as bytes in memory (estimated size 48.3 KiB, free 363.2 MiB) | |
2023-05-17 16:30:21,695 INFO storage.BlockManagerInfo: Added broadcast_26_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 48.3 KiB, free: 365.5 MiB) | |
2023-05-17 16:30:21,695 INFO spark.SparkContext: Created broadcast 26 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:21,695 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:21,695 INFO scheduler.TaskSchedulerImpl: Adding task set 44.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:21,696 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 44.0 (TID 34) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:21,697 INFO executor.Executor: Running task 0.0 in stage 44.0 (TID 34) | |
2023-05-17 16:30:21,705 INFO executor.Executor: Finished task 0.0 in stage 44.0 (TID 34). 958 bytes result sent to driver | |
2023-05-17 16:30:21,706 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 44.0 (TID 34) in 10 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:21,706 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 44.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:21,706 INFO scheduler.DAGScheduler: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.023 s | |
2023-05-17 16:30:21,706 INFO scheduler.DAGScheduler: Job 21 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:21,706 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 44: Stage finished | |
2023-05-17 16:30:21,706 INFO scheduler.DAGScheduler: Job 21 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.024852 s | |
2023-05-17 16:30:21,709 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/.temp/20230517163012379 | |
2023-05-17 16:30:21,709 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,710 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,712 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,712 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,713 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,714 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:21,714 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:21,789 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:30:21,790 INFO scheduler.DAGScheduler: Got job 22 (collect at SparkHoodieBackedTableMetadataWriter.java:185) with 1 output partitions | |
2023-05-17 16:30:21,790 INFO scheduler.DAGScheduler: Final stage: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) | |
2023-05-17 16:30:21,790 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 45) | |
2023-05-17 16:30:21,790 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:21,790 INFO scheduler.DAGScheduler: Submitting ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255), which has no missing parents | |
2023-05-17 16:30:21,826 INFO memory.MemoryStore: Block broadcast_27 stored as values in memory (estimated size 566.1 KiB, free 362.6 MiB) | |
2023-05-17 16:30:21,830 INFO memory.MemoryStore: Block broadcast_27_piece0 stored as bytes in memory (estimated size 213.9 KiB, free 362.4 MiB) | |
2023-05-17 16:30:21,830 INFO storage.BlockManagerInfo: Added broadcast_27_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 213.9 KiB, free: 365.3 MiB) | |
2023-05-17 16:30:21,830 INFO spark.SparkContext: Created broadcast 27 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:21,831 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:21,831 INFO scheduler.TaskSchedulerImpl: Adding task set 46.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:21,832 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 46.0 (TID 35) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:21,832 INFO executor.Executor: Running task 0.0 in stage 46.0 (TID 35) | |
2023-05-17 16:30:21,849 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:30:21,849 INFO executor.Executor: Finished task 0.0 in stage 46.0 (TID 35). 1224 bytes result sent to driver | |
2023-05-17 16:30:21,850 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 46.0 (TID 35) in 18 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:21,850 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 46.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:21,851 INFO scheduler.DAGScheduler: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) finished in 0.059 s | |
2023-05-17 16:30:21,851 INFO scheduler.DAGScheduler: Job 22 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:21,851 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 46: Stage finished | |
2023-05-17 16:30:21,851 INFO scheduler.DAGScheduler: Job 22 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.062600 s | |
2023-05-17 16:30:21,853 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,854 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,855 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,856 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,858 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,858 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,859 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,859 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:21,859 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:21,859 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:30:21,859 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,860 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,862 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,862 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,863 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,863 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:21,863 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:21,864 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517163012379002 | |
2023-05-17 16:30:21,870 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,870 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:21,871 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:21,874 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:30:21,874 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:30:21,883 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,886 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,887 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,889 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,889 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,890 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,891 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:21,891 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:21,892 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,893 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,894 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,894 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:21,896 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,897 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:30:21,897 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:30:21,907 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:21,908 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:21,909 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:21,911 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:21,912 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517163012379__commit__INFLIGHT]} | |
2023-05-17 16:30:21,914 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:30:21,915 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517163012379__commit__INFLIGHT] | |
2023-05-17 16:30:21,915 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/20230517163012379.inflight | |
2023-05-17 16:30:22,331 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t163003/.hoodie/20230517163012379.commit | |
2023-05-17 16:30:22,331 INFO timeline.HoodieActiveTimeline: Completed [==>20230517163012379__commit__INFLIGHT] | |
2023-05-17 16:30:22,332 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t163003. Falling back to direct markers. | |
2023-05-17 16:30:22,369 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:30:22,370 INFO scheduler.DAGScheduler: Got job 23 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:30:22,370 INFO scheduler.DAGScheduler: Final stage: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:30:22,370 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:30:22,370 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:30:22,371 INFO scheduler.DAGScheduler: Submitting ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:30:22,381 INFO memory.MemoryStore: Block broadcast_28 stored as values in memory (estimated size 128.2 KiB, free 362.3 MiB) | |
2023-05-17 16:30:22,383 INFO memory.MemoryStore: Block broadcast_28_piece0 stored as bytes in memory (estimated size 48.3 KiB, free 362.3 MiB) | |
2023-05-17 16:30:22,383 INFO storage.BlockManagerInfo: Added broadcast_28_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:34001 (size: 48.3 KiB, free: 365.3 MiB) | |
2023-05-17 16:30:22,384 INFO spark.SparkContext: Created broadcast 28 from broadcast at DAGScheduler.scala:1433 | |
2023-05-17 16:30:22,384 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:30:22,384 INFO scheduler.TaskSchedulerImpl: Adding task set 47.0 with 1 tasks resource profile 0 | |
2023-05-17 16:30:22,385 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 47.0 (TID 36) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:30:22,386 INFO executor.Executor: Running task 0.0 in stage 47.0 (TID 36) | |
2023-05-17 16:30:22,395 INFO executor.Executor: Finished task 0.0 in stage 47.0 (TID 36). 940 bytes result sent to driver | |
2023-05-17 16:30:22,396 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 47.0 (TID 36) in 11 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:30:22,396 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 47.0, whose tasks have all completed, from pool | |
2023-05-17 16:30:22,396 INFO scheduler.DAGScheduler: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.025 s | |
2023-05-17 16:30:22,397 INFO scheduler.DAGScheduler: Job 23 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:30:22,397 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 47: Stage finished | |
2023-05-17 16:30:22,397 INFO scheduler.DAGScheduler: Job 23 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.027350 s | |
2023-05-17 16:30:22,399 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t163003/.hoodie/.temp/20230517163012379 | |
2023-05-17 16:30:22,399 INFO client.BaseHoodieWriteClient: Committed 20230517163012379 | |
2023-05-17 16:30:22,403 INFO rdd.MapPartitionsRDD: Removing RDD 74 from persistence list | |
2023-05-17 16:30:22,404 INFO rdd.MapPartitionsRDD: Removing RDD 53 from persistence list | |
2023-05-17 16:30:22,405 INFO rdd.UnionRDD: Removing RDD 64 from persistence list | |
2023-05-17 16:30:22,405 INFO storage.BlockManager: Removing RDD 53 | |
2023-05-17 16:30:22,405 INFO rdd.MapPartitionsRDD: Removing RDD 43 from persistence list | |
2023-05-17 16:30:22,406 INFO client.BaseHoodieWriteClient: Start to clean synchronously. | |
2023-05-17 16:30:22,406 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,406 INFO storage.BlockManager: Removing RDD 74 | |
2023-05-17 16:30:22,407 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,409 INFO storage.BlockManager: Removing RDD 64 | |
2023-05-17 16:30:22,409 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,409 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,413 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:22,413 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,414 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,418 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,418 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,419 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,421 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,422 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:22,422 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:22,422 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:22,422 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:22,422 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:22,422 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:30:22,422 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,423 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,423 INFO storage.BlockManager: Removing RDD 43 | |
2023-05-17 16:30:22,425 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,425 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,426 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:22,426 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,427 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,428 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,428 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,429 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,430 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,431 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:22,431 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:22,432 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:22,432 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:22,432 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:22,432 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517163022406 | |
2023-05-17 16:30:22,432 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t163003. Server=ip-172-31-19-77.us-east-2.compute.internal:39525, Timeout=300 | |
2023-05-17 16:30:22,432 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,432 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:22,433 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:22,445 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:39525/v1/hoodie/view/compactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t163003&lastinstantts=20230517163012379&timelinehash=e9155df6d87cb9fa4e613d9556bf65f21db88c2ca14d3f47aca2cb6cc2b5cc39) | |
2023-05-17 16:30:22,787 INFO storage.BlockManagerInfo: Removed broadcast_26_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 48.3 KiB, free: 365.3 MiB) | |
2023-05-17 16:30:22,790 INFO storage.BlockManagerInfo: Removed broadcast_28_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 48.3 KiB, free: 365.4 MiB) | |
2023-05-17 16:30:22,798 INFO storage.BlockManagerInfo: Removed broadcast_27_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 213.9 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:22,800 INFO storage.BlockManagerInfo: Removed broadcast_25_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 48.2 KiB, free: 365.6 MiB) | |
2023-05-17 16:30:22,806 INFO storage.BlockManagerInfo: Removed broadcast_24_piece0 on ip-172-31-19-77.us-east-2.compute.internal:34001 in memory (size: 214.1 KiB, free: 365.8 MiB) | |
2023-05-17 16:30:22,946 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,948 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,951 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,951 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,952 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:22,953 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,954 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,956 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:22,956 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,957 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:22,958 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:22,960 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:22,960 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:22,960 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:22,960 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:22,961 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,016 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:39525/v1/hoodie/view/logcompactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t163003&lastinstantts=20230517163012379&timelinehash=e9155df6d87cb9fa4e613d9556bf65f21db88c2ca14d3f47aca2cb6cc2b5cc39) | |
2023-05-17 16:30:23,021 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:30:23,021 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:30:23,023 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,024 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,025 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,027 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,027 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,027 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,028 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,029 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,030 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,030 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,031 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,033 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,035 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:23,035 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,036 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,036 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:30:23,036 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:30:23,036 INFO client.BaseHoodieWriteClient: Start to archive synchronously. | |
2023-05-17 16:30:23,038 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,038 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,039 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,041 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,041 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,042 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,047 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,048 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:23,048 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,049 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,049 INFO client.HoodieTimelineArchiver: Not archiving as there is no compaction yet on the metadata table | |
2023-05-17 16:30:23,049 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:30:23,049 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t163003. Server=ip-172-31-19-77.us-east-2.compute.internal:39525, Timeout=300 | |
2023-05-17 16:30:23,049 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,049 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,050 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,054 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,055 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:39525/v1/hoodie/view/refresh/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t163003&lastinstantts=20230517163012379&timelinehash=e9155df6d87cb9fa4e613d9556bf65f21db88c2ca14d3f47aca2cb6cc2b5cc39) | |
2023-05-17 16:30:23,060 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,061 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,064 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,065 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,066 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:23,066 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,066 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,066 INFO deltastreamer.DeltaSync: Commit 20230517163012379 successful! | |
2023-05-17 16:30:23,135 INFO conf.HiveConf: Found configuration file file:/home/hadoop/spark-3.1.3-bin-hadoop3.2/conf/hive-site.xml | |
2023-05-17 16:30:23,281 WARN conf.HiveConf: HiveConf of name hive.server2.thrift.url does not exist | |
2023-05-17 16:30:23,288 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,290 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,292 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,292 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,293 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,623 INFO hive.metastore: Trying to connect to metastore with URI thrift://ip-172-31-19-77.us-east-2.compute.internal:9083 | |
2023-05-17 16:30:23,638 INFO hive.metastore: Opened a connection to metastore, current connections: 1 | |
2023-05-17 16:30:23,656 INFO hive.metastore: Connected to metastore. | |
2023-05-17 16:30:23,730 INFO hive.HiveSyncTool: Syncing target hoodie table with hive table(default.stocks20230517t163003). Hive metastore URL from HiveConf:thrift://ip-172-31-19-77.us-east-2.compute.internal:9083). Hive metastore URL from HiveSyncConfig:null, basePath :/tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,730 INFO hive.HiveSyncTool: Trying to sync hoodie table stocks20230517t163003 with base path /tmp/deltastreamertest/stocks20230517t163003 of type COPY_ON_WRITE | |
2023-05-17 16:30:23,756 INFO table.TableSchemaResolver: Reading schema from /tmp/deltastreamertest/stocks20230517t163003/2018/08/31/01f08b83-1a13-4b4f-a3b7-5ddd080217ea-0_0-22-23_20230517163012379.parquet | |
2023-05-17 16:30:23,788 INFO hive.HiveSyncTool: Hive table stocks20230517t163003 is not found. Creating it with schema message stock_ticks { | |
optional binary _hoodie_commit_time (UTF8); | |
optional binary _hoodie_commit_seqno (UTF8); | |
optional binary _hoodie_record_key (UTF8); | |
optional binary _hoodie_partition_path (UTF8); | |
optional binary _hoodie_file_name (UTF8); | |
required int64 volume; | |
required binary ts (UTF8); | |
required binary symbol (UTF8); | |
required int32 year; | |
required binary month (UTF8); | |
required double high; | |
required double low; | |
required binary key (UTF8); | |
required binary date (UTF8); | |
required double close; | |
required double open; | |
required binary day (UTF8); | |
} | |
2023-05-17 16:30:23,874 INFO hive.HoodieHiveSyncClient: No comment difference of stocks20230517t163003 | |
2023-05-17 16:30:23,874 INFO hive.HiveSyncTool: Schema sync complete. Syncing partitions for stocks20230517t163003 | |
2023-05-17 16:30:23,874 INFO hive.HiveSyncTool: Last commit time synced was found to be null | |
2023-05-17 16:30:23,874 INFO hive.HiveSyncTool: Sync all partitions given the last commit time synced is empty or before the start of the active timeline. Listing all partitions in /tmp/deltastreamertest/stocks20230517t163003, file system: DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_-347453190_1, ugi=hadoop (auth:SIMPLE)]] | |
2023-05-17 16:30:23,891 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,893 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,895 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t163003 | |
2023-05-17 16:30:23,895 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,896 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,897 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,898 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:23,899 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,899 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,899 INFO metadata.HoodieTableMetadataUtil: Loading latest merged file slices for metadata table partition files | |
2023-05-17 16:30:23,904 INFO view.AbstractTableFileSystemView: Took 5 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:30:23,905 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:30:23,905 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:30:23,907 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=2, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
2023-05-17 16:30:23,910 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__commit__COMPLETED]} | |
2023-05-17 16:30:23,923 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,924 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:30:23,926 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata | |
2023-05-17 16:30:23,934 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517163012379__deltacommit__COMPLETED]} | |
2023-05-17 16:30:23,942 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:30:23,946 INFO log.AbstractHoodieLogRecordReader: Reading a delete block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:30:23,946 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:30:23,947 INFO log.AbstractHoodieLogRecordReader: Reading a data block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t163003/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 at instant 20230517163012379 | |
2023-05-17 16:30:23,947 INFO log.AbstractHoodieLogRecordReader: Merging the final data blocks | |
2023-05-17 16:30:23,947 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 2 | |
2023-05-17 16:30:23,948 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 1 | |
2023-05-17 16:30:24,058 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:30:24,062 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:30:24,062 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:30:24,118 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: Number of log files scanned => 1 | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: MaxMemoryInBytes allowed for compaction => 1073741824 | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: Number of entries in MemoryBasedMap in ExternalSpillableMap => 2 | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: Total size in bytes of MemoryBasedMap in ExternalSpillableMap => 1248 | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: Number of entries in BitCaskDiskMap in ExternalSpillableMap => 0 | |
2023-05-17 16:30:24,130 INFO log.HoodieMergedLogRecordScanner: Size of file spilled to disk => 0 | |
2023-05-17 16:30:24,131 INFO metadata.HoodieBackedTableMetadata: Opened 1 metadata log files (dataset instant=20230517163012379, metadata instant=20230517163012379) in 221 ms | |
2023-05-17 16:30:24,136 INFO metadata.BaseTableMetadata: Listed partitions from metadata: #partitions=1 | |
2023-05-17 16:30:24,139 INFO hive.HiveSyncTool: New Partitions [2018/08/31] | |
2023-05-17 16:30:24,140 INFO ddl.HMSDDLExecutor: Adding partitions 1 to table stocks20230517t163003 | |
2023-05-17 16:30:24,196 INFO ddl.HMSDDLExecutor: HMSDDLExecutor add a batch partitions done: 1 | |
2023-05-17 16:30:24,233 INFO hive.HiveSyncTool: Sync complete for stocks20230517t163003 | |
2023-05-17 16:30:24,237 INFO hive.metastore: Closed a connection to metastore, current connections: 0 | |
2023-05-17 16:30:24,238 INFO deltastreamer.DeltaSync: Shutting down embedded timeline server | |
2023-05-17 16:30:24,238 INFO embedded.EmbeddedTimelineService: Closing Timeline server | |
2023-05-17 16:30:24,238 INFO service.TimelineService: Closing Timeline Service | |
2023-05-17 16:30:24,238 INFO javalin.Javalin: Stopping Javalin ... | |
2023-05-17 16:30:24,251 INFO javalin.Javalin: Javalin has stopped | |
2023-05-17 16:30:24,251 INFO service.TimelineService: Closed Timeline Service | |
2023-05-17 16:30:24,251 INFO embedded.EmbeddedTimelineService: Closed Timeline server | |
2023-05-17 16:30:24,252 INFO deltastreamer.HoodieDeltaStreamer: Shut down delta streamer | |
2023-05-17 16:30:24,260 INFO server.AbstractConnector: Stopped Spark@203dd56b{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:30:24,261 INFO ui.SparkUI: Stopped Spark web UI at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:30:24,271 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! | |
2023-05-17 16:30:24,285 INFO memory.MemoryStore: MemoryStore cleared | |
2023-05-17 16:30:24,285 INFO storage.BlockManager: BlockManager stopped | |
2023-05-17 16:30:24,289 INFO storage.BlockManagerMaster: BlockManagerMaster stopped | |
2023-05-17 16:30:24,295 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! | |
2023-05-17 16:30:24,299 INFO spark.SparkContext: Successfully stopped SparkContext | |
2023-05-17 16:30:24,301 INFO util.ShutdownHookManager: Shutdown hook called | |
2023-05-17 16:30:24,301 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-4bd739f1-bc22-4f6c-a5f3-33bfbc17b2af | |
2023-05-17 16:30:24,304 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-4c1bf246-f34d-46d9-a57f-3ad6adfa8f06 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment