Last active
May 17, 2023 16:57
-
-
Save ad1happy2go/172c5acc8adf4fb21544de64efb58bf7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/schema.avsc /tmp/ | |
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/source /tmp/source_parquet | |
NOW=$(date '+%Y%m%dt%H%M%S') | |
bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
--jars /home/hadoop/v_3.2/hudi-hive-sync-bundle-0.13.0.jar,/home/hadoop/v_3.2/hudi-spark3.2-bundle_2.12-0.13.0.jar \ | |
/home/hadoop/v_3.2/hudi-utilities-slim-bundle_2.12-0.13.0.jar \ | |
--target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
--target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
--source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
--op UPSERT --enable-sync --spark-master yarn \ | |
--hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
--hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
--hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
--hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
--hoodie-conf hoodie.metadata.enable=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
--hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
--hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
--hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
===========LOGS===== | |
[hadoop@ip-172-31-19-77 spark-3.2.4-bin-hadoop3.2]$ NOW=$(date '+%Y%m%dt%H%M%S') | |
[hadoop@ip-172-31-19-77 spark-3.2.4-bin-hadoop3.2]$ bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
> --jars /home/hadoop/v_3.2/hudi-hive-sync-bundle-0.13.0.jar,/home/hadoop/v_3.2/hudi-spark3.2-bundle_2.12-0.13.0.jar \ | |
> /home/hadoop/v_3.2/hudi-utilities-slim-bundle_2.12-0.13.0.jar \ | |
> --target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
> --target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
> --source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
> --source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
> --op UPSERT --enable-sync --spark-master yarn \ | |
> --hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
> --hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
> --hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
> --hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
> --hoodie-conf hoodie.metadata.enable=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
> --hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
> --hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
> --hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
SLF4J: Class path contains multiple SLF4J bindings. | |
SLF4J: Found binding in [jar:file:/home/hadoop/spark-3.2.4-bin-hadoop3.2/jars/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
2023-05-17 16:55:56,740 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
2023-05-17 16:55:57,231 WARN deltastreamer.SchedulerConfGenerator: Job Scheduling Configs will not be in effect as spark.scheduler.mode is not set to FAIR at instantiation time. Continuing without scheduling configs | |
2023-05-17 16:55:57,284 INFO spark.SparkContext: Running Spark version 3.2.4 | |
2023-05-17 16:55:57,320 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:55:57,320 INFO resource.ResourceUtils: No custom resources configured for spark.driver. | |
2023-05-17 16:55:57,321 INFO resource.ResourceUtils: ============================================================== | |
2023-05-17 16:55:57,322 INFO spark.SparkContext: Submitted application: delta-streamer-stocks20230517t165531 | |
2023-05-17 16:55:57,354 INFO resource.ResourceProfile: Default ResourceProfile created, executor resources: Map(cores -> name: cores, amount: 1, script: , vendor: , memory -> name: memory, amount: 1024, script: , vendor: , offHeap -> name: offHeap, amount: 0, script: , vendor: ), task resources: Map(cpus -> name: cpus, amount: 1.0) | |
2023-05-17 16:55:57,373 INFO resource.ResourceProfile: Limiting resource is cpu | |
2023-05-17 16:55:57,374 INFO resource.ResourceProfileManager: Added ResourceProfile id: 0 | |
2023-05-17 16:55:57,453 INFO spark.SecurityManager: Changing view acls to: hadoop | |
2023-05-17 16:55:57,454 INFO spark.SecurityManager: Changing modify acls to: hadoop | |
2023-05-17 16:55:57,454 INFO spark.SecurityManager: Changing view acls groups to: | |
2023-05-17 16:55:57,455 INFO spark.SecurityManager: Changing modify acls groups to: | |
2023-05-17 16:55:57,455 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); groups with view permissions: Set(); users with modify permissions: Set(hadoop); groups with modify permissions: Set() | |
2023-05-17 16:55:57,493 INFO Configuration.deprecation: mapred.output.compression.codec is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.codec | |
2023-05-17 16:55:57,493 INFO Configuration.deprecation: mapred.output.compress is deprecated. Instead, use mapreduce.output.fileoutputformat.compress | |
2023-05-17 16:55:57,493 INFO Configuration.deprecation: mapred.output.compression.type is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.type | |
2023-05-17 16:55:57,782 INFO util.Utils: Successfully started service 'sparkDriver' on port 34671. | |
2023-05-17 16:55:57,808 INFO spark.SparkEnv: Registering MapOutputTracker | |
2023-05-17 16:55:57,844 INFO spark.SparkEnv: Registering BlockManagerMaster | |
2023-05-17 16:55:57,882 INFO storage.BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information | |
2023-05-17 16:55:57,882 INFO storage.BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up | |
2023-05-17 16:55:57,886 INFO spark.SparkEnv: Registering BlockManagerMasterHeartbeat | |
2023-05-17 16:55:57,906 INFO storage.DiskBlockManager: Created local directory at /mnt/tmp/blockmgr-573ec1b9-a027-46b7-87c2-9f8f8b529480 | |
2023-05-17 16:55:57,930 INFO memory.MemoryStore: MemoryStore started with capacity 366.3 MiB | |
2023-05-17 16:55:57,946 INFO spark.SparkEnv: Registering OutputCommitCoordinator | |
2023-05-17 16:55:58,035 INFO util.log: Logging initialized @3042ms to org.sparkproject.jetty.util.log.Slf4jLog | |
2023-05-17 16:55:58,103 INFO server.Server: jetty-9.4.44.v20210927; built: 2021-09-27T23:02:44.612Z; git: 8da83308eeca865e495e53ef315a249d63ba9332; jvm 1.8.0_372-b07 | |
2023-05-17 16:55:58,124 INFO server.Server: Started @3133ms | |
2023-05-17 16:55:58,198 INFO server.AbstractConnector: Started ServerConnector@4fe875be{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:55:58,198 INFO util.Utils: Successfully started service 'SparkUI' on port 8090. | |
2023-05-17 16:55:58,227 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@682bd3c4{/jobs,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,230 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7fab4be7{/jobs/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,231 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4d74c3ba{/jobs/job,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,232 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@49dbaaf3{/jobs/job/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,233 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@736f3e9e{/stages,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,234 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1f443fae{/stages/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,235 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@79ab34c1{/stages/stage,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,236 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@28d79cba{/stages/stage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,237 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@29f0c4f2{/stages/pool,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,238 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6093d508{/stages/pool/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,239 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3c7cfcbb{/storage,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,239 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1bfe3203{/storage/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,240 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@70e3f36f{/storage/rdd,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,241 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@23e44287{/storage/rdd/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,242 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6daf2337{/environment,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,243 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@250b236d{/environment/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,244 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7551da2a{/executors,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,244 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@78de58ea{/executors/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,245 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7d755813{/executors/threadDump,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,246 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@63da207f{/executors/threadDump/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,254 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@40d10481{/static,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,255 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6e1f8469{/,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,256 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3b6c624{/api,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,257 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@31ee96f4{/jobs/job/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,258 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@652ab8d9{/stages/stage/kill,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:58,259 INFO ui.SparkUI: Bound SparkUI to 0.0.0.0, and started at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:55:58,278 INFO spark.SparkContext: Added JAR file:///home/hadoop/v_3.2/hudi-hive-sync-bundle-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,278 INFO spark.SparkContext: Added JAR file:///home/hadoop/v_3.2/hudi-spark3.2-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-spark3.2-bundle_2.12-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,278 INFO spark.SparkContext: Added JAR file:/home/hadoop/v_3.2/hudi-utilities-slim-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,439 INFO executor.Executor: Starting executor ID driver on host ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:55:58,458 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,519 INFO client.TransportClientFactory: Successfully created connection to ip-172-31-19-77.us-east-2.compute.internal/172.31.19.77:34671 after 30 ms (0 ms spent in bootstraps) | |
2023-05-17 16:55:58,527 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-hive-sync-bundle-0.13.0.jar to /mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/fetchFileTemp1143665047357098286.tmp | |
2023-05-17 16:55:58,622 INFO executor.Executor: Adding file:/mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/hudi-hive-sync-bundle-0.13.0.jar to class loader | |
2023-05-17 16:55:58,622 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,622 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/fetchFileTemp4693027231994799213.tmp | |
2023-05-17 16:55:58,722 INFO executor.Executor: Adding file:/mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/hudi-utilities-slim-bundle_2.12-0.13.0.jar to class loader | |
2023-05-17 16:55:58,723 INFO executor.Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-spark3.2-bundle_2.12-0.13.0.jar with timestamp 1684342557274 | |
2023-05-17 16:55:58,723 INFO util.Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:34671/jars/hudi-spark3.2-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/fetchFileTemp3043281126992137341.tmp | |
2023-05-17 16:55:58,804 INFO executor.Executor: Adding file:/mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a/userFiles-60aafbbb-5201-4b63-bd4e-15bb4f76e4bc/hudi-spark3.2-bundle_2.12-0.13.0.jar to class loader | |
2023-05-17 16:55:58,811 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 38325. | |
2023-05-17 16:55:58,811 INFO netty.NettyBlockTransferService: Server created on ip-172-31-19-77.us-east-2.compute.internal:38325 | |
2023-05-17 16:55:58,813 INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy | |
2023-05-17 16:55:58,820 INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 38325, None) | |
2023-05-17 16:55:58,823 INFO storage.BlockManagerMasterEndpoint: Registering block manager ip-172-31-19-77.us-east-2.compute.internal:38325 with 366.3 MiB RAM, BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 38325, None) | |
2023-05-17 16:55:58,826 INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 38325, None) | |
2023-05-17 16:55:58,827 INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 38325, None) | |
2023-05-17 16:55:58,963 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@153cd6bb{/metrics/json,null,AVAILABLE,@Spark} | |
2023-05-17 16:55:59,359 WARN config.DFSPropertiesConfiguration: Cannot find HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf | |
2023-05-17 16:55:59,384 INFO utilities.UtilHelpers: Adding overridden properties to file properties. | |
2023-05-17 16:55:59,452 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. | |
2023-05-17 16:55:59,620 INFO deltastreamer.HoodieDeltaStreamer: Creating delta streamer with configs: | |
hoodie.auto.adjust.lock.configs: true | |
hoodie.cleaner.policy.failed.writes: EAGER | |
hoodie.datasource.hive_sync.auto_create_database: true | |
hoodie.datasource.hive_sync.database: default | |
hoodie.datasource.hive_sync.ignore_exceptions: false | |
hoodie.datasource.hive_sync.jdbcurl: jdbc:hive2://ip-172-31-19-77.us-east-2.compute.internal:10000 | |
hoodie.datasource.hive_sync.mode: hms | |
hoodie.datasource.hive_sync.partition_extractor_class: org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor | |
hoodie.datasource.hive_sync.partition_fields: date | |
hoodie.datasource.hive_sync.skip_ro_suffix: true | |
hoodie.datasource.hive_sync.sync_as_datasource: true | |
hoodie.datasource.hive_sync.sync_comment: true | |
hoodie.datasource.write.hive_style_partitioning: false | |
hoodie.datasource.write.keygenerator.type: SIMPLE | |
hoodie.datasource.write.partitionpath.field: date | |
hoodie.datasource.write.precombine.field: ts | |
hoodie.datasource.write.reconcile.schema: false | |
hoodie.datasource.write.recordkey.field: symbol | |
hoodie.deltastreamer.schemaprovider.source.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.schemaprovider.target.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.source.dfs.root: /tmp/source_parquet | |
hoodie.index.type: BLOOM | |
hoodie.metadata.enable: true | |
hoodie.write.concurrency.mode: single_writer | |
hoodie.write.lock.provider: org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider | |
hoodie.write.lock.zookeeper.base_path: /hudi | |
hoodie.write.lock.zookeeper.port: 2181 | |
hoodie.write.lock.zookeeper.url: ip-172-31-19-77.us-east-2.compute.internal | |
2023-05-17 16:55:59,628 INFO fs.FSUtils: Resolving file /tmp/schema.avscto be a remote file. | |
2023-05-17 16:55:59,831 INFO factory.HoodieSparkKeyGeneratorFactory: The value of hoodie.datasource.write.keygenerator.type is empty, use SIMPLE | |
2023-05-17 16:55:59,852 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t165531 as hoodie table /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,081 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,097 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:00,105 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,105 INFO table.HoodieTableMetaClient: Finished initializing Table of type COPY_ON_WRITE from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,206 INFO helpers.DFSPathSelector: Using path selector org.apache.hudi.utilities.sources.helpers.DFSPathSelector | |
2023-05-17 16:56:00,206 INFO deltastreamer.HoodieDeltaStreamer: Delta Streamer running only single round | |
2023-05-17 16:56:00,208 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,212 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:00,219 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:00,240 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:00,244 INFO deltastreamer.DeltaSync: Checkpoint to resume from : Optional.empty | |
2023-05-17 16:56:00,251 INFO helpers.DFSPathSelector: Root path => /tmp/source_parquet source limit => 9223372036854775807 | |
2023-05-17 16:56:00,562 INFO memory.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 487.0 KiB, free 365.8 MiB) | |
2023-05-17 16:56:00,878 INFO memory.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 53.5 KiB, free 365.8 MiB) | |
2023-05-17 16:56:00,880 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 53.5 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:00,887 INFO spark.SparkContext: Created broadcast 0 from textFile at JsonDFSSource.java:54 | |
2023-05-17 16:56:01,010 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library | |
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path | |
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) | |
at java.lang.Runtime.loadLibrary0(Runtime.java:843) | |
at java.lang.System.loadLibrary(System.java:1136) | |
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32) | |
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71) | |
at java.lang.Class.forName0(Native Method) | |
at java.lang.Class.forName(Class.java:348) | |
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:2604) | |
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2569) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180) | |
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) | |
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:81) | |
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:139) | |
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) | |
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:300) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:296) | |
at org.apache.spark.rdd.RDD.$anonfun$isEmpty$1(RDD.scala:1557) | |
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414) | |
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1557) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty$(JavaRDDLike.scala:545) | |
at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:545) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:460) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:364) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:215) | |
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:213) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:592) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) | |
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:966) | |
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:191) | |
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:214) | |
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) | |
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1054) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1063) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
2023-05-17 16:56:01,013 ERROR lzo.LzoCodec: Cannot load native-lzo without native-hadoop | |
2023-05-17 16:56:01,022 INFO mapred.FileInputFormat: Total input files to process : 2 | |
2023-05-17 16:56:01,110 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:545 | |
2023-05-17 16:56:01,132 INFO scheduler.DAGScheduler: Got job 0 (isEmpty at DeltaSync.java:545) with 1 output partitions | |
2023-05-17 16:56:01,133 INFO scheduler.DAGScheduler: Final stage: ResultStage 0 (isEmpty at DeltaSync.java:545) | |
2023-05-17 16:56:01,133 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:01,135 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:01,142 INFO scheduler.DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67), which has no missing parents | |
2023-05-17 16:56:01,212 INFO memory.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 6.5 KiB, free 365.8 MiB) | |
2023-05-17 16:56:01,215 INFO memory.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 3.6 KiB, free 365.8 MiB) | |
2023-05-17 16:56:01,215 INFO storage.BlockManagerInfo: Added broadcast_1_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 3.6 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:01,216 INFO spark.SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:01,233 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:01,233 INFO scheduler.TaskSchedulerImpl: Adding task set 0.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:01,296 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:01,323 INFO executor.Executor: Running task 0.0 in stage 0.0 (TID 0) | |
2023-05-17 16:56:01,654 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:56:01,734 INFO executor.Executor: Finished task 0.0 in stage 0.0 (TID 0). 1273 bytes result sent to driver | |
2023-05-17 16:56:01,768 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 488 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:01,771 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:01,788 INFO scheduler.DAGScheduler: ResultStage 0 (isEmpty at DeltaSync.java:545) finished in 0.618 s | |
2023-05-17 16:56:01,794 INFO scheduler.DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:01,795 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished | |
2023-05-17 16:56:01,797 INFO scheduler.DAGScheduler: Job 0 finished: isEmpty at DeltaSync.java:545, took 0.687355 s | |
2023-05-17 16:56:01,823 INFO deltastreamer.DeltaSync: Setting up new Hoodie Write Client | |
2023-05-17 16:56:01,843 INFO embedded.EmbeddedTimelineService: Starting Timeline service !! | |
2023-05-17 16:56:01,844 INFO embedded.EmbeddedTimelineService: Overriding hostIp to (ip-172-31-19-77.us-east-2.compute.internal) found in spark-conf. It was null | |
2023-05-17 16:56:01,858 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:01,859 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:01,889 INFO util.log: Logging initialized @6898ms to org.apache.hudi.org.eclipse.jetty.util.log.Slf4jLog | |
2023-05-17 16:56:01,988 INFO storage.BlockManagerInfo: Removed broadcast_1_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 3.6 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:02,140 INFO javalin.Javalin: | |
__ __ _ __ __ | |
/ /____ _ _ __ ____ _ / /(_)____ / // / | |
__ / // __ `/| | / // __ `// // // __ \ / // /_ | |
/ /_/ // /_/ / | |/ // /_/ // // // / / / /__ __/ | |
\____/ \__,_/ |___/ \__,_//_//_//_/ /_/ /_/ | |
https://javalin.io/documentation | |
2023-05-17 16:56:02,143 INFO javalin.Javalin: Starting Javalin ... | |
2023-05-17 16:56:02,150 INFO javalin.Javalin: You are running Javalin 4.6.7 (released October 24, 2022. Your Javalin version is 205 days old. Consider checking for a newer version.). | |
2023-05-17 16:56:02,265 INFO server.Server: jetty-9.4.48.v20220622; built: 2022-06-21T20:42:25.880Z; git: 6b67c5719d1f4371b33655ff2d047d24e171e49a; jvm 1.8.0_372-b07 | |
2023-05-17 16:56:02,399 INFO server.Server: Started @7408ms | |
2023-05-17 16:56:02,399 INFO javalin.Javalin: Listening on http://localhost:35375/ | |
2023-05-17 16:56:02,399 INFO javalin.Javalin: Javalin started in 257ms \o/ | |
2023-05-17 16:56:02,399 INFO service.TimelineService: Starting Timeline server on port :35375 | |
2023-05-17 16:56:02,400 INFO embedded.EmbeddedTimelineService: Started embedded timeline server at ip-172-31-19-77.us-east-2.compute.internal:35375 | |
2023-05-17 16:56:02,427 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:56:02,451 INFO client.BaseHoodieClient: Timeline Server already running. Not restarting the service | |
2023-05-17 16:56:02,468 INFO spark.SparkContext: Starting job: isEmpty at DeltaSync.java:665 | |
2023-05-17 16:56:02,470 INFO scheduler.DAGScheduler: Got job 1 (isEmpty at DeltaSync.java:665) with 1 output partitions | |
2023-05-17 16:56:02,470 INFO scheduler.DAGScheduler: Final stage: ResultStage 1 (isEmpty at DeltaSync.java:665) | |
2023-05-17 16:56:02,470 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:02,470 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:02,471 INFO scheduler.DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558), which has no missing parents | |
2023-05-17 16:56:02,477 INFO memory.MemoryStore: Block broadcast_2 stored as values in memory (estimated size 13.0 KiB, free 365.8 MiB) | |
2023-05-17 16:56:02,480 INFO memory.MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 6.8 KiB, free 365.8 MiB) | |
2023-05-17 16:56:02,482 INFO storage.BlockManagerInfo: Added broadcast_2_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 6.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:02,483 INFO spark.SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:02,483 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:02,484 INFO scheduler.TaskSchedulerImpl: Adding task set 1.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:02,486 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:02,487 INFO executor.Executor: Running task 0.0 in stage 1.0 (TID 1) | |
2023-05-17 16:56:02,497 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:56:02,532 INFO executor.Executor: Finished task 0.0 in stage 1.0 (TID 1). 1072 bytes result sent to driver | |
2023-05-17 16:56:02,564 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 78 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:02,564 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:02,566 INFO scheduler.DAGScheduler: ResultStage 1 (isEmpty at DeltaSync.java:665) finished in 0.093 s | |
2023-05-17 16:56:02,566 INFO scheduler.DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:02,566 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished | |
2023-05-17 16:56:02,567 INFO scheduler.DAGScheduler: Job 1 finished: isEmpty at DeltaSync.java:665, took 0.098493 s | |
2023-05-17 16:56:02,573 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,578 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,583 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,583 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,585 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,592 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:56:02,596 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,599 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,602 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,602 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,604 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,612 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,621 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,624 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,627 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,631 WARN metadata.HoodieBackedTableMetadata: Metadata table was not found at path /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,632 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:02,632 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:02,648 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517165602569 action: commit | |
2023-05-17 16:56:02,655 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517165602569__commit__REQUESTED] | |
2023-05-17 16:56:02,670 INFO deltastreamer.DeltaSync: Starting commit : 20230517165602569 | |
2023-05-17 16:56:02,671 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,673 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,676 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,677 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,680 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__REQUESTED]} | |
2023-05-17 16:56:02,730 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,733 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,737 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,754 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__REQUESTED]} | |
2023-05-17 16:56:02,755 INFO metadata.HoodieBackedTableMetadataWriter: Creating a new metadata table in /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata at instant 00000000000000 | |
2023-05-17 16:56:02,756 INFO table.HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata as hoodie table /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,815 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,820 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,825 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,825 INFO table.HoodieTableMetaClient: Finished initializing Table of type MERGE_ON_READ from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,825 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,828 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,831 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,831 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,839 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,842 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,847 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,856 INFO view.AbstractTableFileSystemView: Took 3 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:02,882 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:02,884 INFO metadata.HoodieBackedTableMetadataWriter: Creating 1 file groups for partition files with base fileId files- at instant time 00000000000000 | |
2023-05-17 16:56:02,890 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:56:02,891 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:56:02,896 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=0} does not exist. Create a new file | |
2023-05-17 16:56:02,943 INFO metadata.HoodieBackedTableMetadataWriter: Initializing metadata table by using file listings in /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:02,943 INFO metadata.HoodieBackedTableMetadataWriter: Triggering empty Commit to metadata to initialize | |
2023-05-17 16:56:02,946 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:02,947 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:02,948 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:56:02,949 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:56:02,949 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,951 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,954 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,954 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,955 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,960 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:02,960 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:02,961 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,963 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,965 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,965 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,966 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,966 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:56:02,967 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,968 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,970 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,971 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,972 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
2023-05-17 16:56:02,972 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:02,973 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:02,974 INFO client.BaseHoodieWriteClient: Generate a new instant time: 00000000000000 action: deltacommit | |
2023-05-17 16:56:02,974 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>00000000000000__deltacommit__REQUESTED] | |
2023-05-17 16:56:02,979 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,982 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:02,985 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,985 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:02,987 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>00000000000000__deltacommit__REQUESTED]} | |
2023-05-17 16:56:02,987 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:02,987 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:02,994 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:56:02,995 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:56:03,014 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:03,015 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:03,079 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:56:03,085 INFO scheduler.DAGScheduler: Job 2 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.005476 s | |
2023-05-17 16:56:03,088 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=0}, InputPartitionStat={}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:56:03,090 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:56:03,090 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:03,091 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:03,092 INFO commit.UpsertPartitioner: Total Buckets :0, buckets info => {}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:56:03,106 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/00000000000000.deltacommit.requested | |
2023-05-17 16:56:03,112 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:56:03,132 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:56:03,261 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:56:03,261 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 00000000000000 | |
2023-05-17 16:56:03,386 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:56:03,388 INFO scheduler.DAGScheduler: Job 3 finished: collect at HoodieJavaRDD.java:163, took 0.001706 s | |
2023-05-17 16:56:03,389 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:0 numReplaceFileIds:0 | |
2023-05-17 16:56:03,505 INFO storage.BlockManagerInfo: Removed broadcast_2_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 6.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:03,550 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:56:03,551 INFO scheduler.DAGScheduler: Job 4 finished: collect at HoodieJavaRDD.java:163, took 0.000337 s | |
2023-05-17 16:56:03,551 INFO commit.BaseSparkCommitActionExecutor: Committing 00000000000000, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:56:03,562 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:56:03,563 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
2023-05-17 16:56:03,582 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/00000000000000.deltacommit | |
2023-05-17 16:56:03,582 INFO timeline.HoodieActiveTimeline: Completed [==>00000000000000__deltacommit__INFLIGHT] | |
2023-05-17 16:56:03,582 INFO commit.BaseSparkCommitActionExecutor: Committed 00000000000000 | |
2023-05-17 16:56:03,584 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,585 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:03,588 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,588 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,589 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:03,590 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:03,590 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:03,694 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:56:03,696 INFO scheduler.DAGScheduler: Job 5 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.001198 s | |
2023-05-17 16:56:03,698 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:03,741 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:03,743 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:03,745 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:03,745 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,747 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:03,748 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,749 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:03,750 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:03,750 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:03,750 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:03,752 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:03,754 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:03,754 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,756 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:03,757 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:03,759 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:03,759 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:03,760 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:03,760 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:03,760 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:03,761 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:56:03,761 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:56:03,808 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:56:03,812 INFO scheduler.DAGScheduler: Registering RDD 15 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 3 | |
2023-05-17 16:56:03,816 INFO scheduler.DAGScheduler: Registering RDD 19 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 2 | |
2023-05-17 16:56:03,817 INFO scheduler.DAGScheduler: Got job 6 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:56:03,817 INFO scheduler.DAGScheduler: Final stage: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:56:03,817 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 3) | |
2023-05-17 16:56:03,818 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 3) | |
2023-05-17 16:56:03,822 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:56:03,830 INFO memory.MemoryStore: Block broadcast_3 stored as values in memory (estimated size 31.3 KiB, free 365.7 MiB) | |
2023-05-17 16:56:03,833 INFO memory.MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 14.9 KiB, free 365.7 MiB) | |
2023-05-17 16:56:03,833 INFO storage.BlockManagerInfo: Added broadcast_3_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 14.9 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:03,834 INFO spark.SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:03,835 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:03,835 INFO scheduler.TaskSchedulerImpl: Adding task set 2.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:03,838 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:03,838 INFO executor.Executor: Running task 0.0 in stage 2.0 (TID 2) | |
2023-05-17 16:56:03,897 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
2023-05-17 16:56:04,194 INFO executor.Executor: Finished task 0.0 in stage 2.0 (TID 2). 1202 bytes result sent to driver | |
2023-05-17 16:56:04,195 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 2.0 (TID 3) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, ANY, 4544 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,197 INFO executor.Executor: Running task 1.0 in stage 2.0 (TID 3) | |
2023-05-17 16:56:04,200 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 363 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:04,206 INFO rdd.HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_2.json:0+363815 | |
2023-05-17 16:56:04,296 INFO executor.Executor: Finished task 1.0 in stage 2.0 (TID 3). 1202 bytes result sent to driver | |
2023-05-17 16:56:04,298 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 2.0 (TID 3) in 103 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:04,298 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:04,299 INFO scheduler.DAGScheduler: ShuffleMapStage 2 (mapToPair at HoodieJavaRDD.java:135) finished in 0.472 s | |
2023-05-17 16:56:04,300 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:04,300 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:04,301 INFO scheduler.DAGScheduler: waiting: Set(ShuffleMapStage 3, ResultStage 4) | |
2023-05-17 16:56:04,301 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:04,307 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:04,316 INFO memory.MemoryStore: Block broadcast_4 stored as values in memory (estimated size 25.5 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,320 INFO memory.MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 12.2 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,321 INFO storage.BlockManagerInfo: Added broadcast_4_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 12.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:04,323 INFO spark.SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:04,324 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:04,324 INFO scheduler.TaskSchedulerImpl: Adding task set 3.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:04,328 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 3.0 (TID 4) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,328 INFO executor.Executor: Running task 0.0 in stage 3.0 (TID 4) | |
2023-05-17 16:56:04,368 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:04,371 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 13 ms | |
2023-05-17 16:56:04,412 INFO memory.MemoryStore: Block rdd_17_0 stored as values in memory (estimated size 8.2 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,413 INFO storage.BlockManagerInfo: Added rdd_17_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 8.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:04,422 INFO executor.Executor: Finished task 0.0 in stage 3.0 (TID 4). 1417 bytes result sent to driver | |
2023-05-17 16:56:04,423 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 3.0 (TID 5) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,424 INFO executor.Executor: Running task 1.0 in stage 3.0 (TID 5) | |
2023-05-17 16:56:04,424 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 3.0 (TID 4) in 98 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:04,428 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:04,428 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:04,443 INFO memory.MemoryStore: Block rdd_17_1 stored as values in memory (estimated size 5.8 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,444 INFO storage.BlockManagerInfo: Added rdd_17_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 5.8 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:04,448 INFO executor.Executor: Finished task 1.0 in stage 3.0 (TID 5). 1417 bytes result sent to driver | |
2023-05-17 16:56:04,451 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 3.0 (TID 5) in 27 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:04,451 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:04,451 INFO scheduler.DAGScheduler: ShuffleMapStage 3 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.139 s | |
2023-05-17 16:56:04,452 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:04,452 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:04,452 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 4) | |
2023-05-17 16:56:04,452 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:04,452 INFO scheduler.DAGScheduler: Submitting ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:04,455 INFO memory.MemoryStore: Block broadcast_5 stored as values in memory (estimated size 5.5 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,457 INFO memory.MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 365.7 MiB) | |
2023-05-17 16:56:04,457 INFO storage.BlockManagerInfo: Added broadcast_5_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 3.2 KiB, free: 366.2 MiB) | |
2023-05-17 16:56:04,458 INFO spark.SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:04,458 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:04,458 INFO scheduler.TaskSchedulerImpl: Adding task set 4.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:04,460 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 4.0 (TID 6) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,461 INFO executor.Executor: Running task 1.0 in stage 4.0 (TID 6) | |
2023-05-17 16:56:04,464 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (120.0 B) non-empty blocks including 2 (120.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:04,464 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:04,471 INFO executor.Executor: Finished task 1.0 in stage 4.0 (TID 6). 1244 bytes result sent to driver | |
2023-05-17 16:56:04,473 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 4.0 (TID 7) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,473 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 4.0 (TID 6) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:04,474 INFO executor.Executor: Running task 0.0 in stage 4.0 (TID 7) | |
2023-05-17 16:56:04,482 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:04,482 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:04,484 INFO executor.Executor: Finished task 0.0 in stage 4.0 (TID 7). 1227 bytes result sent to driver | |
2023-05-17 16:56:04,485 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 4.0 (TID 7) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:04,485 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:04,487 INFO scheduler.DAGScheduler: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.033 s | |
2023-05-17 16:56:04,487 INFO scheduler.DAGScheduler: Job 6 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:04,488 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 4: Stage finished | |
2023-05-17 16:56:04,488 INFO scheduler.DAGScheduler: Job 6 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.679601 s | |
2023-05-17 16:56:04,663 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:56:04,664 INFO scheduler.DAGScheduler: Got job 7 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:56:04,664 INFO scheduler.DAGScheduler: Final stage: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:56:04,664 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:04,665 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:04,666 INFO scheduler.DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:56:04,708 INFO memory.MemoryStore: Block broadcast_6 stored as values in memory (estimated size 638.8 KiB, free 365.0 MiB) | |
2023-05-17 16:56:04,711 INFO memory.MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 238.7 KiB, free 364.8 MiB) | |
2023-05-17 16:56:04,712 INFO storage.BlockManagerInfo: Added broadcast_6_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 238.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:04,712 INFO spark.SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:04,713 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:04,713 INFO scheduler.TaskSchedulerImpl: Adding task set 5.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:04,716 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 5.0 (TID 8) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:04,717 INFO executor.Executor: Running task 0.0 in stage 5.0 (TID 8) | |
2023-05-17 16:56:04,795 INFO executor.Executor: Finished task 0.0 in stage 5.0 (TID 8). 797 bytes result sent to driver | |
2023-05-17 16:56:04,796 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 5.0 (TID 8) in 82 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:04,796 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:04,797 INFO scheduler.DAGScheduler: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) finished in 0.129 s | |
2023-05-17 16:56:04,797 INFO scheduler.DAGScheduler: Job 7 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:04,797 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 5: Stage finished | |
2023-05-17 16:56:04,798 INFO scheduler.DAGScheduler: Job 7 finished: collect at HoodieSparkEngineContext.java:137, took 0.134084 s | |
2023-05-17 16:56:04,993 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:103 | |
2023-05-17 16:56:04,994 INFO scheduler.DAGScheduler: Got job 8 (collect at HoodieSparkEngineContext.java:103) with 1 output partitions | |
2023-05-17 16:56:04,995 INFO scheduler.DAGScheduler: Final stage: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) | |
2023-05-17 16:56:04,995 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:04,995 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:04,998 INFO scheduler.DAGScheduler: Submitting ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103), which has no missing parents | |
2023-05-17 16:56:05,050 INFO memory.MemoryStore: Block broadcast_7 stored as values in memory (estimated size 638.6 KiB, free 364.2 MiB) | |
2023-05-17 16:56:05,054 INFO memory.MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 238.6 KiB, free 364.0 MiB) | |
2023-05-17 16:56:05,055 INFO storage.BlockManagerInfo: Added broadcast_7_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 238.6 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:05,056 INFO spark.SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,056 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:05,056 INFO scheduler.TaskSchedulerImpl: Adding task set 6.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:05,059 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 6.0 (TID 9) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4332 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,059 INFO executor.Executor: Running task 0.0 in stage 6.0 (TID 9) | |
2023-05-17 16:56:05,152 INFO storage.BlockManagerInfo: Removed broadcast_3_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 14.9 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:05,166 INFO executor.Executor: Finished task 0.0 in stage 6.0 (TID 9). 840 bytes result sent to driver | |
2023-05-17 16:56:05,167 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 6.0 (TID 9) in 108 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:05,167 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,168 INFO scheduler.DAGScheduler: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) finished in 0.167 s | |
2023-05-17 16:56:05,169 INFO scheduler.DAGScheduler: Job 8 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:05,169 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 6: Stage finished | |
2023-05-17 16:56:05,169 INFO scheduler.DAGScheduler: Job 8 finished: collect at HoodieSparkEngineContext.java:103, took 0.175821 s | |
2023-05-17 16:56:05,180 INFO bloom.SparkHoodieBloomIndexHelper: Input parallelism: 2, Index parallelism: 2 | |
2023-05-17 16:56:05,200 INFO spark.SparkContext: Starting job: countByKey at SparkHoodieBloomIndexHelper.java:195 | |
2023-05-17 16:56:05,203 INFO scheduler.DAGScheduler: Registering RDD 27 (countByKey at SparkHoodieBloomIndexHelper.java:195) as input to shuffle 4 | |
2023-05-17 16:56:05,203 INFO scheduler.DAGScheduler: Got job 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) with 2 output partitions | |
2023-05-17 16:56:05,203 INFO scheduler.DAGScheduler: Final stage: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) | |
2023-05-17 16:56:05,203 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 8) | |
2023-05-17 16:56:05,203 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 8) | |
2023-05-17 16:56:05,214 INFO storage.BlockManager: Removing RDD 4 | |
2023-05-17 16:56:05,215 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:56:05,224 INFO memory.MemoryStore: Block broadcast_8 stored as values in memory (estimated size 27.1 KiB, free 364.0 MiB) | |
2023-05-17 16:56:05,225 INFO memory.MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 12.8 KiB, free 364.0 MiB) | |
2023-05-17 16:56:05,225 INFO storage.BlockManagerInfo: Added broadcast_8_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 12.8 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:05,226 INFO spark.SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,227 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:05,227 INFO scheduler.TaskSchedulerImpl: Adding task set 8.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:05,228 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 8.0 (TID 10) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,229 INFO executor.Executor: Running task 0.0 in stage 8.0 (TID 10) | |
2023-05-17 16:56:05,233 INFO storage.BlockManager: Found block rdd_17_0 locally | |
2023-05-17 16:56:05,242 INFO executor.Executor: Finished task 0.0 in stage 8.0 (TID 10). 1116 bytes result sent to driver | |
2023-05-17 16:56:05,243 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 8.0 (TID 11) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,244 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 8.0 (TID 10) in 16 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:05,245 INFO executor.Executor: Running task 1.0 in stage 8.0 (TID 11) | |
2023-05-17 16:56:05,252 INFO storage.BlockManager: Found block rdd_17_1 locally | |
2023-05-17 16:56:05,255 INFO executor.Executor: Finished task 1.0 in stage 8.0 (TID 11). 1116 bytes result sent to driver | |
2023-05-17 16:56:05,256 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 8.0 (TID 11) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:05,257 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,257 INFO scheduler.DAGScheduler: ShuffleMapStage 8 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.040 s | |
2023-05-17 16:56:05,258 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:05,258 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:05,258 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 9) | |
2023-05-17 16:56:05,258 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:05,259 INFO scheduler.DAGScheduler: Submitting ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
2023-05-17 16:56:05,261 INFO memory.MemoryStore: Block broadcast_9 stored as values in memory (estimated size 5.5 KiB, free 364.0 MiB) | |
2023-05-17 16:56:05,263 INFO memory.MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 364.0 MiB) | |
2023-05-17 16:56:05,268 INFO storage.BlockManagerInfo: Added broadcast_9_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 3.2 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:05,269 INFO storage.BlockManagerInfo: Removed broadcast_6_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 238.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,270 INFO spark.SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,270 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:05,270 INFO scheduler.TaskSchedulerImpl: Adding task set 9.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:05,271 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 9.0 (TID 12) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,271 INFO executor.Executor: Running task 0.0 in stage 9.0 (TID 12) | |
2023-05-17 16:56:05,273 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,273 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,274 INFO executor.Executor: Finished task 0.0 in stage 9.0 (TID 12). 1227 bytes result sent to driver | |
2023-05-17 16:56:05,275 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 9.0 (TID 13) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,275 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 9.0 (TID 12) in 4 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:05,276 INFO executor.Executor: Running task 1.0 in stage 9.0 (TID 13) | |
2023-05-17 16:56:05,278 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,278 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,279 INFO executor.Executor: Finished task 1.0 in stage 9.0 (TID 13). 1227 bytes result sent to driver | |
2023-05-17 16:56:05,280 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 9.0 (TID 13) in 5 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:05,280 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,281 INFO scheduler.DAGScheduler: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.021 s | |
2023-05-17 16:56:05,281 INFO scheduler.DAGScheduler: Job 9 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:05,282 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 9: Stage finished | |
2023-05-17 16:56:05,282 INFO scheduler.DAGScheduler: Job 9 finished: countByKey at SparkHoodieBloomIndexHelper.java:195, took 0.081229 s | |
2023-05-17 16:56:05,283 INFO bloom.BucketizedBloomCheckPartitioner: TotalBuckets 0, min_buckets/partition 1 | |
2023-05-17 16:56:05,301 INFO storage.BlockManagerInfo: Removed broadcast_4_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 12.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,313 INFO storage.BlockManager: Removing RDD 12 | |
2023-05-17 16:56:05,323 INFO storage.BlockManagerInfo: Removed broadcast_5_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 3.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,375 INFO rdd.MapPartitionsRDD: Removing RDD 17 from persistence list | |
2023-05-17 16:56:05,379 INFO storage.BlockManager: Removing RDD 17 | |
2023-05-17 16:56:05,379 INFO rdd.MapPartitionsRDD: Removing RDD 35 from persistence list | |
2023-05-17 16:56:05,381 INFO storage.BlockManager: Removing RDD 35 | |
2023-05-17 16:56:05,382 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:05,384 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:05,403 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:56:05,406 INFO scheduler.DAGScheduler: Registering RDD 29 (mapToPair at SparkHoodieBloomIndexHelper.java:164) as input to shuffle 8 | |
2023-05-17 16:56:05,407 INFO scheduler.DAGScheduler: Registering RDD 36 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 5 | |
2023-05-17 16:56:05,407 INFO scheduler.DAGScheduler: Registering RDD 35 (flatMapToPair at SparkHoodieBloomIndexHelper.java:175) as input to shuffle 6 | |
2023-05-17 16:56:05,408 INFO scheduler.DAGScheduler: Registering RDD 45 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 7 | |
2023-05-17 16:56:05,408 INFO scheduler.DAGScheduler: Got job 10 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
2023-05-17 16:56:05,408 INFO scheduler.DAGScheduler: Final stage: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:56:05,408 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 14) | |
2023-05-17 16:56:05,409 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 14) | |
2023-05-17 16:56:05,414 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:56:05,419 INFO memory.MemoryStore: Block broadcast_10 stored as values in memory (estimated size 25.2 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,421 INFO memory.MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 12.1 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,422 INFO storage.BlockManagerInfo: Added broadcast_10_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 12.1 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,422 INFO spark.SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,423 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 12 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:05,423 INFO scheduler.TaskSchedulerImpl: Adding task set 12.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:05,424 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 12.0 (TID 14) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,425 INFO executor.Executor: Running task 0.0 in stage 12.0 (TID 14) | |
2023-05-17 16:56:05,429 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,429 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,465 INFO executor.Executor: Finished task 0.0 in stage 12.0 (TID 14). 1417 bytes result sent to driver | |
2023-05-17 16:56:05,467 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 12.0 (TID 15) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,467 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 12.0 (TID 14) in 43 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:05,467 INFO executor.Executor: Running task 1.0 in stage 12.0 (TID 15) | |
2023-05-17 16:56:05,472 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,472 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,495 INFO executor.Executor: Finished task 1.0 in stage 12.0 (TID 15). 1417 bytes result sent to driver | |
2023-05-17 16:56:05,498 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 12.0 (TID 15) in 32 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:05,498 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 12.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,499 INFO scheduler.DAGScheduler: ShuffleMapStage 12 (mapToPair at HoodieJavaRDD.java:135) finished in 0.082 s | |
2023-05-17 16:56:05,499 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:05,499 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:05,499 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15, ShuffleMapStage 14) | |
2023-05-17 16:56:05,499 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:05,500 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:05,511 INFO memory.MemoryStore: Block broadcast_11 stored as values in memory (estimated size 10.0 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,513 INFO memory.MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 5.2 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,513 INFO storage.BlockManagerInfo: Added broadcast_11_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 5.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,515 INFO spark.SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,515 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:05,515 INFO scheduler.TaskSchedulerImpl: Adding task set 14.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:05,517 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 14.0 (TID 16) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,518 INFO executor.Executor: Running task 0.0 in stage 14.0 (TID 16) | |
2023-05-17 16:56:05,530 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (3.5 KiB) non-empty blocks including 1 (3.5 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,530 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,531 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,531 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,556 INFO memory.MemoryStore: Block rdd_43_0 stored as values in memory (estimated size 8.2 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,557 INFO storage.BlockManagerInfo: Added rdd_43_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 8.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,561 INFO executor.Executor: Finished task 0.0 in stage 14.0 (TID 16). 1417 bytes result sent to driver | |
2023-05-17 16:56:05,562 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 14.0 (TID 17) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,562 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 14.0 (TID 16) in 46 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:05,562 INFO executor.Executor: Running task 1.0 in stage 14.0 (TID 17) | |
2023-05-17 16:56:05,568 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (2.4 KiB) non-empty blocks including 1 (2.4 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,569 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,570 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,570 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,583 INFO memory.MemoryStore: Block rdd_43_1 stored as values in memory (estimated size 5.8 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,584 INFO storage.BlockManagerInfo: Added rdd_43_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 5.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,592 INFO executor.Executor: Finished task 1.0 in stage 14.0 (TID 17). 1417 bytes result sent to driver | |
2023-05-17 16:56:05,593 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 14.0 (TID 17) in 32 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:05,593 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 14.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,594 INFO scheduler.DAGScheduler: ShuffleMapStage 14 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.092 s | |
2023-05-17 16:56:05,594 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:05,594 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:05,595 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 15) | |
2023-05-17 16:56:05,595 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:05,595 INFO scheduler.DAGScheduler: Submitting ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:05,597 INFO memory.MemoryStore: Block broadcast_12 stored as values in memory (estimated size 5.5 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,599 INFO memory.MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 364.8 MiB) | |
2023-05-17 16:56:05,599 INFO storage.BlockManagerInfo: Added broadcast_12_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 3.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:05,600 INFO spark.SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,601 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:05,601 INFO scheduler.TaskSchedulerImpl: Adding task set 15.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:05,602 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 15.0 (TID 18) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,602 INFO executor.Executor: Running task 1.0 in stage 15.0 (TID 18) | |
2023-05-17 16:56:05,607 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (194.0 B) non-empty blocks including 2 (194.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,607 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 2 ms | |
2023-05-17 16:56:05,616 INFO executor.Executor: Finished task 1.0 in stage 15.0 (TID 18). 1284 bytes result sent to driver | |
2023-05-17 16:56:05,617 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 15.0 (TID 19) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,617 INFO executor.Executor: Running task 0.0 in stage 15.0 (TID 19) | |
2023-05-17 16:56:05,618 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 15.0 (TID 18) in 16 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:05,621 INFO storage.ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:05,621 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:05,622 INFO executor.Executor: Finished task 0.0 in stage 15.0 (TID 19). 1227 bytes result sent to driver | |
2023-05-17 16:56:05,624 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 15.0 (TID 19) in 7 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:05,624 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,625 INFO scheduler.DAGScheduler: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.029 s | |
2023-05-17 16:56:05,626 INFO scheduler.DAGScheduler: Job 10 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:05,626 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 15: Stage finished | |
2023-05-17 16:56:05,626 INFO scheduler.DAGScheduler: Job 10 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.223326 s | |
2023-05-17 16:56:05,628 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=99, numUpdates=0}, InputPartitionStat={2018/08/31=WorkloadStat {numInserts=99, numUpdates=0}}, OutputPartitionStat={}, operationType=UPSERT} | |
2023-05-17 16:56:05,628 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:56:05,771 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:56:05,771 INFO scheduler.DAGScheduler: Got job 11 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:56:05,771 INFO scheduler.DAGScheduler: Final stage: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:56:05,771 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:05,771 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:05,772 INFO scheduler.DAGScheduler: Submitting ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:56:05,813 INFO memory.MemoryStore: Block broadcast_13 stored as values in memory (estimated size 639.6 KiB, free 364.2 MiB) | |
2023-05-17 16:56:05,817 INFO memory.MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 239.3 KiB, free 363.9 MiB) | |
2023-05-17 16:56:05,817 INFO storage.BlockManagerInfo: Added broadcast_13_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 239.3 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:05,818 INFO spark.SparkContext: Created broadcast 13 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:05,818 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:05,818 INFO scheduler.TaskSchedulerImpl: Adding task set 16.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:05,820 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 16.0 (TID 20) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:05,820 INFO executor.Executor: Running task 0.0 in stage 16.0 (TID 20) | |
2023-05-17 16:56:05,861 INFO executor.Executor: Finished task 0.0 in stage 16.0 (TID 20). 834 bytes result sent to driver | |
2023-05-17 16:56:05,864 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 16.0 (TID 20) in 45 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:05,864 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 16.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:05,865 INFO scheduler.DAGScheduler: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) finished in 0.091 s | |
2023-05-17 16:56:05,865 INFO scheduler.DAGScheduler: Job 11 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:05,865 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 16: Stage finished | |
2023-05-17 16:56:05,865 INFO scheduler.DAGScheduler: Job 11 finished: collectAsMap at UpsertPartitioner.java:279, took 0.094576 s | |
2023-05-17 16:56:05,867 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:05,868 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:05,869 INFO commit.UpsertPartitioner: For partitionPath : 2018/08/31 Small Files => [] | |
2023-05-17 16:56:05,869 INFO commit.UpsertPartitioner: After small file assignment: unassignedInserts => 99, totalInsertBuckets => 1, recordsPerBucket => 122880 | |
2023-05-17 16:56:05,873 INFO commit.UpsertPartitioner: Total insert buckets for partition path 2018/08/31 => [(InsertBucket {bucketNumber=0, weight=1.0},1.0)] | |
2023-05-17 16:56:05,873 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=INSERT, fileIdPrefix=e44b9e93-4589-4407-a6ec-2e9b3b1cd29d, partitionPath=2018/08/31}}, | |
Partition to insert buckets => {2018/08/31=[(InsertBucket {bucketNumber=0, weight=1.0},1.0)]}, | |
UpdateLocations mapped to buckets =>{} | |
2023-05-17 16:56:05,884 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/20230517165602569.commit.requested | |
2023-05-17 16:56:05,905 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/20230517165602569.inflight | |
2023-05-17 16:56:05,985 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:56:05,985 INFO commit.BaseCommitActionExecutor: Auto commit disabled for 20230517165602569 | |
2023-05-17 16:56:06,000 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:696 | |
2023-05-17 16:56:06,003 INFO scheduler.DAGScheduler: Registering RDD 49 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 9 | |
2023-05-17 16:56:06,003 INFO scheduler.DAGScheduler: Got job 12 (sum at DeltaSync.java:696) with 1 output partitions | |
2023-05-17 16:56:06,003 INFO scheduler.DAGScheduler: Final stage: ResultStage 22 (sum at DeltaSync.java:696) | |
2023-05-17 16:56:06,003 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 21) | |
2023-05-17 16:56:06,004 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 21) | |
2023-05-17 16:56:06,005 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:56:06,049 INFO memory.MemoryStore: Block broadcast_14 stored as values in memory (estimated size 644.9 KiB, free 363.3 MiB) | |
2023-05-17 16:56:06,052 INFO memory.MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 241.7 KiB, free 363.1 MiB) | |
2023-05-17 16:56:06,053 INFO storage.BlockManagerInfo: Added broadcast_14_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 241.7 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:06,053 INFO spark.SparkContext: Created broadcast 14 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:06,054 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
2023-05-17 16:56:06,054 INFO scheduler.TaskSchedulerImpl: Adding task set 21.0 with 2 tasks resource profile 0 | |
2023-05-17 16:56:06,055 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 21.0 (TID 21) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:06,055 INFO executor.Executor: Running task 0.0 in stage 21.0 (TID 21) | |
2023-05-17 16:56:06,091 INFO storage.BlockManager: Found block rdd_43_0 locally | |
2023-05-17 16:56:06,102 INFO executor.Executor: Finished task 0.0 in stage 21.0 (TID 21). 1072 bytes result sent to driver | |
2023-05-17 16:56:06,102 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 21.0 (TID 22) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:06,103 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 21.0 (TID 21) in 48 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
2023-05-17 16:56:06,103 INFO executor.Executor: Running task 1.0 in stage 21.0 (TID 22) | |
2023-05-17 16:56:06,130 INFO storage.BlockManager: Found block rdd_43_1 locally | |
2023-05-17 16:56:06,139 INFO executor.Executor: Finished task 1.0 in stage 21.0 (TID 22). 1072 bytes result sent to driver | |
2023-05-17 16:56:06,140 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 21.0 (TID 22) in 38 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
2023-05-17 16:56:06,140 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:06,141 INFO scheduler.DAGScheduler: ShuffleMapStage 21 (mapToPair at HoodieJavaRDD.java:135) finished in 0.134 s | |
2023-05-17 16:56:06,141 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:06,141 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:06,141 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 22) | |
2023-05-17 16:56:06,141 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:06,142 INFO scheduler.DAGScheduler: Submitting ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696), which has no missing parents | |
2023-05-17 16:56:06,184 INFO memory.MemoryStore: Block broadcast_15 stored as values in memory (estimated size 652.4 KiB, free 362.4 MiB) | |
2023-05-17 16:56:06,188 INFO memory.MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 246.0 KiB, free 362.2 MiB) | |
2023-05-17 16:56:06,188 INFO storage.BlockManagerInfo: Added broadcast_15_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 246.0 KiB, free: 365.3 MiB) | |
2023-05-17 16:56:06,188 INFO spark.SparkContext: Created broadcast 15 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:06,189 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:06,189 INFO scheduler.TaskSchedulerImpl: Adding task set 22.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:06,190 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 22.0 (TID 23) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:06,191 INFO executor.Executor: Running task 0.0 in stage 22.0 (TID 23) | |
2023-05-17 16:56:06,225 INFO storage.ShuffleBlockFetcherIterator: Getting 2 (6.2 KiB) non-empty blocks including 2 (6.2 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:06,225 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:06,241 INFO queue.SimpleExecutor: Starting consumer, consuming records from the records iterator directly | |
2023-05-17 16:56:06,309 INFO storage.BlockManagerInfo: Removed broadcast_7_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 238.6 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:06,329 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t165531. Falling back to direct markers. | |
2023-05-17 16:56:06,329 INFO storage.BlockManagerInfo: Removed broadcast_10_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 12.1 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:06,333 INFO storage.BlockManagerInfo: Removed broadcast_9_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 3.2 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:06,333 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t165531/.hoodie/.temp/20230517165602569/2018/08/31/e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0_0-22-23_20230517165602569.parquet.marker.CREATE | |
2023-05-17 16:56:06,341 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t165531/.hoodie/.temp/20230517165602569/2018/08/31/e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0_0-22-23_20230517165602569.parquet.marker.CREATE in 11 ms | |
2023-05-17 16:56:06,347 INFO storage.BlockManagerInfo: Removed broadcast_12_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 3.2 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:06,358 INFO storage.BlockManagerInfo: Removed broadcast_13_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 239.3 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:06,365 INFO storage.BlockManagerInfo: Removed broadcast_11_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 5.2 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:06,374 INFO storage.BlockManagerInfo: Removed broadcast_14_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 241.7 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:06,390 INFO storage.BlockManagerInfo: Removed broadcast_8_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 12.8 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:06,432 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:56:06,812 INFO io.HoodieCreateHandle: New CreateHandle for partition :2018/08/31 with fileId e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0 | |
2023-05-17 16:56:06,873 INFO io.HoodieCreateHandle: Closing the file e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0 as we are done with all the records 99 | |
2023-05-17 16:56:07,568 INFO io.HoodieCreateHandle: CreateHandle for partitionPath 2018/08/31 fileID e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0, took 1325 ms. | |
2023-05-17 16:56:07,571 INFO memory.MemoryStore: Block rdd_53_0 stored as values in memory (estimated size 376.0 B, free 364.9 MiB) | |
2023-05-17 16:56:07,571 INFO storage.BlockManagerInfo: Added rdd_53_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 376.0 B, free: 366.0 MiB) | |
2023-05-17 16:56:07,579 INFO executor.Executor: Finished task 0.0 in stage 22.0 (TID 23). 1146 bytes result sent to driver | |
2023-05-17 16:56:07,580 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 22.0 (TID 23) in 1390 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:07,580 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 22.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:07,580 INFO scheduler.DAGScheduler: ResultStage 22 (sum at DeltaSync.java:696) finished in 1.438 s | |
2023-05-17 16:56:07,581 INFO scheduler.DAGScheduler: Job 12 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:07,581 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 22: Stage finished | |
2023-05-17 16:56:07,584 INFO scheduler.DAGScheduler: Job 12 finished: sum at DeltaSync.java:696, took 1.583872 s | |
2023-05-17 16:56:07,591 INFO spark.SparkContext: Starting job: sum at DeltaSync.java:697 | |
2023-05-17 16:56:07,594 INFO scheduler.DAGScheduler: Got job 13 (sum at DeltaSync.java:697) with 1 output partitions | |
2023-05-17 16:56:07,594 INFO scheduler.DAGScheduler: Final stage: ResultStage 28 (sum at DeltaSync.java:697) | |
2023-05-17 16:56:07,594 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 27) | |
2023-05-17 16:56:07,594 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:07,595 INFO scheduler.DAGScheduler: Submitting ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697), which has no missing parents | |
2023-05-17 16:56:07,652 INFO memory.MemoryStore: Block broadcast_16 stored as values in memory (estimated size 652.4 KiB, free 364.2 MiB) | |
2023-05-17 16:56:07,655 INFO memory.MemoryStore: Block broadcast_16_piece0 stored as bytes in memory (estimated size 246.0 KiB, free 364.0 MiB) | |
2023-05-17 16:56:07,655 INFO storage.BlockManagerInfo: Added broadcast_16_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 246.0 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:07,656 INFO spark.SparkContext: Created broadcast 16 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:07,656 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:07,657 INFO scheduler.TaskSchedulerImpl: Adding task set 28.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:07,658 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 28.0 (TID 24) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:07,658 INFO executor.Executor: Running task 0.0 in stage 28.0 (TID 24) | |
2023-05-17 16:56:07,712 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:56:07,713 INFO executor.Executor: Finished task 0.0 in stage 28.0 (TID 24). 888 bytes result sent to driver | |
2023-05-17 16:56:07,714 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 28.0 (TID 24) in 56 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:07,714 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 28.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:07,715 INFO scheduler.DAGScheduler: ResultStage 28 (sum at DeltaSync.java:697) finished in 0.120 s | |
2023-05-17 16:56:07,716 INFO scheduler.DAGScheduler: Job 13 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:07,716 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 28: Stage finished | |
2023-05-17 16:56:07,716 INFO scheduler.DAGScheduler: Job 13 finished: sum at DeltaSync.java:697, took 0.124579 s | |
2023-05-17 16:56:07,833 INFO spark.SparkContext: Starting job: collect at SparkRDDWriteClient.java:101 | |
2023-05-17 16:56:07,835 INFO scheduler.DAGScheduler: Got job 14 (collect at SparkRDDWriteClient.java:101) with 1 output partitions | |
2023-05-17 16:56:07,835 INFO scheduler.DAGScheduler: Final stage: ResultStage 34 (collect at SparkRDDWriteClient.java:101) | |
2023-05-17 16:56:07,835 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 33) | |
2023-05-17 16:56:07,836 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:07,836 INFO scheduler.DAGScheduler: Submitting ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101), which has no missing parents | |
2023-05-17 16:56:07,880 INFO memory.MemoryStore: Block broadcast_17 stored as values in memory (estimated size 652.5 KiB, free 363.4 MiB) | |
2023-05-17 16:56:07,884 INFO memory.MemoryStore: Block broadcast_17_piece0 stored as bytes in memory (estimated size 246.1 KiB, free 363.1 MiB) | |
2023-05-17 16:56:07,884 INFO storage.BlockManagerInfo: Added broadcast_17_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 246.1 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:07,885 INFO spark.SparkContext: Created broadcast 17 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:07,885 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:07,885 INFO scheduler.TaskSchedulerImpl: Adding task set 34.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:07,887 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 34.0 (TID 25) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:07,887 INFO executor.Executor: Running task 0.0 in stage 34.0 (TID 25) | |
2023-05-17 16:56:07,919 INFO storage.BlockManager: Found block rdd_53_0 locally | |
2023-05-17 16:56:07,920 INFO executor.Executor: Finished task 0.0 in stage 34.0 (TID 25). 1119 bytes result sent to driver | |
2023-05-17 16:56:07,921 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 34.0 (TID 25) in 35 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:07,922 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 34.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:07,922 INFO scheduler.DAGScheduler: ResultStage 34 (collect at SparkRDDWriteClient.java:101) finished in 0.085 s | |
2023-05-17 16:56:07,922 INFO scheduler.DAGScheduler: Job 14 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:07,923 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 34: Stage finished | |
2023-05-17 16:56:07,923 INFO scheduler.DAGScheduler: Job 14 finished: collect at SparkRDDWriteClient.java:101, took 0.089805 s | |
2023-05-17 16:56:07,923 INFO client.BaseHoodieWriteClient: Committing 20230517165602569 action commit | |
2023-05-17 16:56:07,923 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,926 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,934 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,934 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,938 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__INFLIGHT]} | |
2023-05-17 16:56:07,938 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,939 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,941 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,942 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:07,943 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,948 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:07,949 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:07,949 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:07,950 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:07,950 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:07,950 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:07,950 INFO util.CommitUtils: Creating metadata for UPSERT numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:56:07,953 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,957 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,962 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,962 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,966 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__INFLIGHT]} | |
2023-05-17 16:56:07,967 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,973 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,978 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:07,979 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:07,980 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:07,982 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:07,984 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:07,984 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:07,985 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:07,985 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:07,985 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:07,985 INFO client.BaseHoodieWriteClient: Committing 20230517165602569 action commit | |
2023-05-17 16:56:07,985 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t165531. Falling back to direct markers. | |
2023-05-17 16:56:08,038 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:56:08,039 INFO scheduler.DAGScheduler: Got job 15 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:56:08,039 INFO scheduler.DAGScheduler: Final stage: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:56:08,039 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:08,039 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:08,039 INFO scheduler.DAGScheduler: Submitting ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:56:08,051 INFO memory.MemoryStore: Block broadcast_18 stored as values in memory (estimated size 148.7 KiB, free 363.0 MiB) | |
2023-05-17 16:56:08,057 INFO memory.MemoryStore: Block broadcast_18_piece0 stored as bytes in memory (estimated size 56.3 KiB, free 362.9 MiB) | |
2023-05-17 16:56:08,057 INFO storage.BlockManagerInfo: Added broadcast_18_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 56.3 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:08,058 INFO spark.SparkContext: Created broadcast 18 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:08,059 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:08,059 INFO scheduler.TaskSchedulerImpl: Adding task set 35.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:08,060 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 35.0 (TID 26) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:08,060 INFO executor.Executor: Running task 0.0 in stage 35.0 (TID 26) | |
2023-05-17 16:56:08,080 INFO executor.Executor: Finished task 0.0 in stage 35.0 (TID 26). 884 bytes result sent to driver | |
2023-05-17 16:56:08,080 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 35.0 (TID 26) in 21 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:08,080 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 35.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:08,081 INFO scheduler.DAGScheduler: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) finished in 0.041 s | |
2023-05-17 16:56:08,081 INFO scheduler.DAGScheduler: Job 15 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:08,081 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 35: Stage finished | |
2023-05-17 16:56:08,082 INFO scheduler.DAGScheduler: Job 15 finished: collect at HoodieSparkEngineContext.java:137, took 0.043506 s | |
2023-05-17 16:56:08,087 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:08,089 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,097 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:08,098 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,100 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,102 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,103 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,104 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,104 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,104 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:56:08,105 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,105 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,106 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:56:08,117 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=5, StoreTimeTaken=0 | |
2023-05-17 16:56:08,125 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,128 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,131 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,132 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,133 INFO metadata.HoodieBackedTableMetadataWriter: Async metadata indexing enabled and following partitions already initialized: [files] | |
2023-05-17 16:56:08,133 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:08,135 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,136 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:08,137 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,138 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,140 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,141 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,141 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,142 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,168 INFO metadata.HoodieTableMetadataUtil: Updating at 20230517165602569 from Commit/UPSERT. #partitions_updated=2 | |
2023-05-17 16:56:08,169 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,170 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,170 INFO metadata.HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
2023-05-17 16:56:08,170 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,170 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,170 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:56:08,175 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
2023-05-17 16:56:08,188 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:56:08,222 INFO client.BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
2023-05-17 16:56:08,222 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,224 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,230 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,230 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,234 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,239 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,239 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,240 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,247 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,250 INFO storage.BlockManagerInfo: Removed broadcast_17_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 246.1 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:08,251 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,251 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,254 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,255 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,255 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,257 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,259 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__INFLIGHT]} | |
2023-05-17 16:56:08,262 INFO client.BaseHoodieWriteClient: Scheduling table service COMPACT | |
2023-05-17 16:56:08,262 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,264 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,270 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,271 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,277 INFO storage.BlockManagerInfo: Removed broadcast_18_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 56.3 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:08,279 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,280 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,280 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,281 INFO client.BaseHoodieWriteClient: Scheduling compaction at instant time :00000000000000001 | |
2023-05-17 16:56:08,288 INFO compact.ScheduleCompactionActionExecutor: Checking if compaction needs to be run on /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,299 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,301 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,304 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,304 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,306 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,306 INFO util.CleanerUtils: Cleaned failed attempts if any | |
2023-05-17 16:56:08,307 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,308 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,312 INFO storage.BlockManagerInfo: Removed broadcast_16_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 246.0 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:08,313 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,313 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,314 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
2023-05-17 16:56:08,314 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,314 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,315 INFO client.BaseHoodieWriteClient: Generate a new instant time: 20230517165602569 action: deltacommit | |
2023-05-17 16:56:08,315 INFO timeline.HoodieActiveTimeline: Creating a new instant [==>20230517165602569__deltacommit__REQUESTED] | |
2023-05-17 16:56:08,341 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,343 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:08,346 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,346 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,348 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__deltacommit__REQUESTED]} | |
2023-05-17 16:56:08,349 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,349 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,388 INFO async.AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
2023-05-17 16:56:08,388 INFO async.AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
2023-05-17 16:56:08,389 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,390 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,408 INFO spark.SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
2023-05-17 16:56:08,410 INFO scheduler.DAGScheduler: Registering RDD 66 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 10 | |
2023-05-17 16:56:08,410 INFO scheduler.DAGScheduler: Got job 16 (countByKey at HoodieJavaPairRDD.java:105) with 1 output partitions | |
2023-05-17 16:56:08,410 INFO scheduler.DAGScheduler: Final stage: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) | |
2023-05-17 16:56:08,410 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 36) | |
2023-05-17 16:56:08,411 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 36) | |
2023-05-17 16:56:08,412 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:08,415 INFO memory.MemoryStore: Block broadcast_19 stored as values in memory (estimated size 9.6 KiB, free 364.9 MiB) | |
2023-05-17 16:56:08,417 INFO memory.MemoryStore: Block broadcast_19_piece0 stored as bytes in memory (estimated size 5.3 KiB, free 364.9 MiB) | |
2023-05-17 16:56:08,417 INFO storage.BlockManagerInfo: Added broadcast_19_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 5.3 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:08,418 INFO spark.SparkContext: Created broadcast 19 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:08,418 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:08,418 INFO scheduler.TaskSchedulerImpl: Adding task set 36.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:08,422 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 36.0 (TID 27) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:08,422 INFO executor.Executor: Running task 0.0 in stage 36.0 (TID 27) | |
2023-05-17 16:56:08,430 INFO memory.MemoryStore: Block rdd_64_0 stored as values in memory (estimated size 380.0 B, free 364.9 MiB) | |
2023-05-17 16:56:08,430 INFO storage.BlockManagerInfo: Added rdd_64_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 380.0 B, free: 366.0 MiB) | |
2023-05-17 16:56:08,439 INFO executor.Executor: Finished task 0.0 in stage 36.0 (TID 27). 1115 bytes result sent to driver | |
2023-05-17 16:56:08,440 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 36.0 (TID 27) in 21 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:08,440 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 36.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:08,440 INFO scheduler.DAGScheduler: ShuffleMapStage 36 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.027 s | |
2023-05-17 16:56:08,440 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:08,441 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:08,441 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 37) | |
2023-05-17 16:56:08,441 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:08,441 INFO scheduler.DAGScheduler: Submitting ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
2023-05-17 16:56:08,443 INFO memory.MemoryStore: Block broadcast_20 stored as values in memory (estimated size 5.5 KiB, free 364.9 MiB) | |
2023-05-17 16:56:08,444 INFO memory.MemoryStore: Block broadcast_20_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 364.9 MiB) | |
2023-05-17 16:56:08,444 INFO storage.BlockManagerInfo: Added broadcast_20_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 3.2 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:08,445 INFO spark.SparkContext: Created broadcast 20 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:08,447 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:08,447 INFO scheduler.TaskSchedulerImpl: Adding task set 37.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:08,448 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 37.0 (TID 28) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:08,448 INFO executor.Executor: Running task 0.0 in stage 37.0 (TID 28) | |
2023-05-17 16:56:08,451 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (117.0 B) non-empty blocks including 1 (117.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:08,451 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:08,460 INFO executor.Executor: Finished task 0.0 in stage 37.0 (TID 28). 1303 bytes result sent to driver | |
2023-05-17 16:56:08,461 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 37.0 (TID 28) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:08,461 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 37.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:08,462 INFO scheduler.DAGScheduler: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.020 s | |
2023-05-17 16:56:08,462 INFO scheduler.DAGScheduler: Job 16 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:08,462 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 37: Stage finished | |
2023-05-17 16:56:08,463 INFO scheduler.DAGScheduler: Job 16 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.054246 s | |
2023-05-17 16:56:08,463 INFO commit.BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=2}, InputPartitionStat={files=WorkloadStat {numInserts=0, numUpdates=2}}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
2023-05-17 16:56:08,463 INFO commit.UpsertPartitioner: AvgRecordSize => 1024 | |
2023-05-17 16:56:08,566 INFO spark.SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
2023-05-17 16:56:08,566 INFO scheduler.DAGScheduler: Got job 17 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
2023-05-17 16:56:08,566 INFO scheduler.DAGScheduler: Final stage: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) | |
2023-05-17 16:56:08,566 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:08,567 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:08,567 INFO scheduler.DAGScheduler: Submitting ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
2023-05-17 16:56:08,597 INFO memory.MemoryStore: Block broadcast_21 stored as values in memory (estimated size 491.5 KiB, free 364.4 MiB) | |
2023-05-17 16:56:08,600 INFO memory.MemoryStore: Block broadcast_21_piece0 stored as bytes in memory (estimated size 183.3 KiB, free 364.2 MiB) | |
2023-05-17 16:56:08,600 INFO storage.BlockManagerInfo: Added broadcast_21_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 183.3 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:08,601 INFO spark.SparkContext: Created broadcast 21 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:08,601 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:08,601 INFO scheduler.TaskSchedulerImpl: Adding task set 38.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:08,602 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 38.0 (TID 29) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4339 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:08,602 INFO executor.Executor: Running task 0.0 in stage 38.0 (TID 29) | |
2023-05-17 16:56:08,628 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:08,628 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:08,629 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:08,629 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,631 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,631 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:56:08,632 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
2023-05-17 16:56:08,638 INFO executor.Executor: Finished task 0.0 in stage 38.0 (TID 29). 829 bytes result sent to driver | |
2023-05-17 16:56:08,639 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 38.0 (TID 29) in 37 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:08,639 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 38.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:08,639 INFO scheduler.DAGScheduler: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) finished in 0.071 s | |
2023-05-17 16:56:08,640 INFO scheduler.DAGScheduler: Job 17 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:08,640 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 38: Stage finished | |
2023-05-17 16:56:08,640 INFO scheduler.DAGScheduler: Job 17 finished: collectAsMap at UpsertPartitioner.java:279, took 0.073850 s | |
2023-05-17 16:56:08,640 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:08,641 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:08,641 INFO commit.UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=UPDATE, fileIdPrefix=files-0000, partitionPath=files}}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{files-0000=0} | |
2023-05-17 16:56:08,641 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/20230517165602569.deltacommit.requested | |
2023-05-17 16:56:08,651 INFO util.FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/20230517165602569.deltacommit.inflight | |
2023-05-17 16:56:09,073 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/20230517165602569.deltacommit.inflight | |
2023-05-17 16:56:09,118 INFO commit.BaseSparkCommitActionExecutor: no validators configured. | |
2023-05-17 16:56:09,118 INFO commit.BaseCommitActionExecutor: Auto commit enabled: Committing 20230517165602569 | |
2023-05-17 16:56:09,203 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:56:09,204 INFO scheduler.DAGScheduler: Registering RDD 70 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 11 | |
2023-05-17 16:56:09,204 INFO scheduler.DAGScheduler: Got job 18 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:56:09,204 INFO scheduler.DAGScheduler: Final stage: ResultStage 40 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:56:09,204 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 39) | |
2023-05-17 16:56:09,205 INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage 39) | |
2023-05-17 16:56:09,206 INFO scheduler.DAGScheduler: Submitting ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
2023-05-17 16:56:09,237 INFO memory.MemoryStore: Block broadcast_22 stored as values in memory (estimated size 495.8 KiB, free 363.7 MiB) | |
2023-05-17 16:56:09,240 INFO memory.MemoryStore: Block broadcast_22_piece0 stored as bytes in memory (estimated size 185.0 KiB, free 363.5 MiB) | |
2023-05-17 16:56:09,240 INFO storage.BlockManagerInfo: Added broadcast_22_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 185.0 KiB, free: 365.6 MiB) | |
2023-05-17 16:56:09,241 INFO spark.SparkContext: Created broadcast 22 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:09,241 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:09,241 INFO scheduler.TaskSchedulerImpl: Adding task set 39.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:09,242 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 39.0 (TID 30) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:09,243 INFO executor.Executor: Running task 0.0 in stage 39.0 (TID 30) | |
2023-05-17 16:56:09,258 INFO storage.BlockManager: Found block rdd_64_0 locally | |
2023-05-17 16:56:09,263 INFO executor.Executor: Finished task 0.0 in stage 39.0 (TID 30). 1072 bytes result sent to driver | |
2023-05-17 16:56:09,264 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 39.0 (TID 30) in 22 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:09,264 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 39.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:09,264 INFO scheduler.DAGScheduler: ShuffleMapStage 39 (mapToPair at HoodieJavaRDD.java:135) finished in 0.057 s | |
2023-05-17 16:56:09,264 INFO scheduler.DAGScheduler: looking for newly runnable stages | |
2023-05-17 16:56:09,264 INFO scheduler.DAGScheduler: running: Set() | |
2023-05-17 16:56:09,264 INFO scheduler.DAGScheduler: waiting: Set(ResultStage 40) | |
2023-05-17 16:56:09,264 INFO scheduler.DAGScheduler: failed: Set() | |
2023-05-17 16:56:09,265 INFO scheduler.DAGScheduler: Submitting ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:56:09,318 INFO memory.MemoryStore: Block broadcast_23 stored as values in memory (estimated size 645.7 KiB, free 362.9 MiB) | |
2023-05-17 16:56:09,321 INFO memory.MemoryStore: Block broadcast_23_piece0 stored as bytes in memory (estimated size 242.4 KiB, free 362.7 MiB) | |
2023-05-17 16:56:09,321 INFO storage.BlockManagerInfo: Added broadcast_23_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 242.4 KiB, free: 365.4 MiB) | |
2023-05-17 16:56:09,322 INFO spark.SparkContext: Created broadcast 23 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:09,322 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:09,322 INFO scheduler.TaskSchedulerImpl: Adding task set 40.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:09,323 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 40.0 (TID 31) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:09,323 INFO executor.Executor: Running task 0.0 in stage 40.0 (TID 31) | |
2023-05-17 16:56:09,344 INFO storage.ShuffleBlockFetcherIterator: Getting 1 (334.0 B) non-empty blocks including 1 (334.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
2023-05-17 16:56:09,344 INFO storage.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
2023-05-17 16:56:09,357 INFO deltacommit.BaseSparkDeltaCommitActionExecutor: Merging updates for commit 20230517165602569 for file files-0000 | |
2023-05-17 16:56:09,369 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:09,369 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:09,369 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:09,369 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:09,370 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:09,370 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:56:09,371 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed.] | |
2023-05-17 16:56:10,627 INFO marker.DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/.temp/20230517165602569/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND | |
2023-05-17 16:56:10,632 INFO marker.DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/.temp/20230517165602569/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND in 8 ms | |
2023-05-17 16:56:10,634 INFO log.HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
2023-05-17 16:56:10,634 INFO log.HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:56:10,753 INFO log.HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=124} exists. Appending to existing file | |
2023-05-17 16:56:10,934 INFO storage.BlockManagerInfo: Removed broadcast_21_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 183.3 KiB, free: 365.6 MiB) | |
2023-05-17 16:56:10,946 INFO storage.BlockManagerInfo: Removed broadcast_22_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 185.0 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:10,950 INFO storage.BlockManagerInfo: Removed broadcast_20_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 3.2 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:10,954 INFO storage.BlockManagerInfo: Removed broadcast_19_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 5.3 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:11,032 INFO impl.MetricsConfig: Loaded properties from hadoop-metrics2.properties | |
2023-05-17 16:56:11,043 INFO impl.MetricsSystemImpl: Scheduled Metric snapshot period at 300 second(s). | |
2023-05-17 16:56:11,043 INFO impl.MetricsSystemImpl: HBase metrics system started | |
2023-05-17 16:56:11,074 INFO metrics.MetricRegistries: Loaded MetricRegistries class org.apache.hudi.org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl | |
2023-05-17 16:56:11,111 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:56:11,114 INFO compress.CodecPool: Got brand-new compressor [.gz] | |
2023-05-17 16:56:11,257 INFO io.HoodieAppendHandle: AppendHandle for partitionPath files filePath files/.files-0000_00000000000000.log.1_0-0-0, took 1891 ms. | |
2023-05-17 16:56:11,666 INFO memory.MemoryStore: Block rdd_74_0 stored as values in memory (estimated size 381.0 B, free 364.0 MiB) | |
2023-05-17 16:56:11,666 INFO storage.BlockManagerInfo: Added rdd_74_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 381.0 B, free: 365.8 MiB) | |
2023-05-17 16:56:11,671 INFO executor.Executor: Finished task 0.0 in stage 40.0 (TID 31). 1558 bytes result sent to driver | |
2023-05-17 16:56:11,672 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 40.0 (TID 31) in 2349 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:11,672 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 40.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:11,673 INFO scheduler.DAGScheduler: ResultStage 40 (collect at HoodieJavaRDD.java:163) finished in 2.407 s | |
2023-05-17 16:56:11,673 INFO scheduler.DAGScheduler: Job 18 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:11,674 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 40: Stage finished | |
2023-05-17 16:56:11,674 INFO scheduler.DAGScheduler: Job 18 finished: collect at HoodieJavaRDD.java:163, took 2.470821 s | |
2023-05-17 16:56:11,675 INFO util.CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:1 numReplaceFileIds:0 | |
2023-05-17 16:56:11,760 INFO spark.SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
2023-05-17 16:56:11,761 INFO scheduler.DAGScheduler: Got job 19 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
2023-05-17 16:56:11,761 INFO scheduler.DAGScheduler: Final stage: ResultStage 42 (collect at HoodieJavaRDD.java:163) | |
2023-05-17 16:56:11,761 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 41) | |
2023-05-17 16:56:11,761 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:11,762 INFO scheduler.DAGScheduler: Submitting ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111), which has no missing parents | |
2023-05-17 16:56:11,802 INFO memory.MemoryStore: Block broadcast_24 stored as values in memory (estimated size 645.7 KiB, free 363.4 MiB) | |
2023-05-17 16:56:11,806 INFO memory.MemoryStore: Block broadcast_24_piece0 stored as bytes in memory (estimated size 242.4 KiB, free 363.1 MiB) | |
2023-05-17 16:56:11,806 INFO storage.BlockManagerInfo: Added broadcast_24_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 242.4 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:11,806 INFO spark.SparkContext: Created broadcast 24 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:11,807 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:11,807 INFO scheduler.TaskSchedulerImpl: Adding task set 42.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:11,808 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 42.0 (TID 32) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:11,808 INFO executor.Executor: Running task 0.0 in stage 42.0 (TID 32) | |
2023-05-17 16:56:11,828 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:56:11,829 INFO executor.Executor: Finished task 0.0 in stage 42.0 (TID 32). 1128 bytes result sent to driver | |
2023-05-17 16:56:11,830 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 42.0 (TID 32) in 22 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:11,830 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 42.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:11,830 INFO scheduler.DAGScheduler: ResultStage 42 (collect at HoodieJavaRDD.java:163) finished in 0.068 s | |
2023-05-17 16:56:11,830 INFO scheduler.DAGScheduler: Job 19 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:11,830 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 42: Stage finished | |
2023-05-17 16:56:11,831 INFO scheduler.DAGScheduler: Job 19 finished: collect at HoodieJavaRDD.java:163, took 0.070618 s | |
2023-05-17 16:56:11,831 INFO commit.BaseSparkCommitActionExecutor: Committing 20230517165602569, action Type deltacommit, operation Type UPSERT_PREPPED | |
2023-05-17 16:56:11,872 INFO spark.SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
2023-05-17 16:56:11,873 INFO scheduler.DAGScheduler: Got job 20 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
2023-05-17 16:56:11,873 INFO scheduler.DAGScheduler: Final stage: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) | |
2023-05-17 16:56:11,873 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:11,873 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:11,873 INFO scheduler.DAGScheduler: Submitting ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
2023-05-17 16:56:11,888 INFO memory.MemoryStore: Block broadcast_25 stored as values in memory (estimated size 148.7 KiB, free 363.0 MiB) | |
2023-05-17 16:56:11,890 INFO memory.MemoryStore: Block broadcast_25_piece0 stored as bytes in memory (estimated size 56.3 KiB, free 362.9 MiB) | |
2023-05-17 16:56:11,890 INFO storage.BlockManagerInfo: Added broadcast_25_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 56.3 KiB, free: 365.5 MiB) | |
2023-05-17 16:56:11,890 INFO spark.SparkContext: Created broadcast 25 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:11,891 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:11,891 INFO scheduler.TaskSchedulerImpl: Adding task set 43.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:11,892 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 43.0 (TID 33) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:11,892 INFO executor.Executor: Running task 0.0 in stage 43.0 (TID 33) | |
2023-05-17 16:56:11,903 INFO executor.Executor: Finished task 0.0 in stage 43.0 (TID 33). 797 bytes result sent to driver | |
2023-05-17 16:56:11,904 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 43.0 (TID 33) in 12 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:11,904 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 43.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:11,904 INFO scheduler.DAGScheduler: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) finished in 0.030 s | |
2023-05-17 16:56:11,905 INFO scheduler.DAGScheduler: Job 20 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:11,905 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 43: Stage finished | |
2023-05-17 16:56:11,905 INFO scheduler.DAGScheduler: Job 20 finished: collect at HoodieSparkEngineContext.java:137, took 0.032686 s | |
2023-05-17 16:56:11,914 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517165602569__deltacommit__INFLIGHT] | |
2023-05-17 16:56:11,915 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/20230517165602569.deltacommit.inflight | |
2023-05-17 16:56:11,934 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/20230517165602569.deltacommit | |
2023-05-17 16:56:11,934 INFO timeline.HoodieActiveTimeline: Completed [==>20230517165602569__deltacommit__INFLIGHT] | |
2023-05-17 16:56:11,935 INFO commit.BaseSparkCommitActionExecutor: Committed 20230517165602569 | |
2023-05-17 16:56:12,000 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:56:12,001 INFO scheduler.DAGScheduler: Got job 21 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:56:12,001 INFO scheduler.DAGScheduler: Final stage: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:56:12,001 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:12,001 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:12,001 INFO scheduler.DAGScheduler: Submitting ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:56:12,013 INFO memory.MemoryStore: Block broadcast_26 stored as values in memory (estimated size 148.9 KiB, free 362.8 MiB) | |
2023-05-17 16:56:12,014 INFO memory.MemoryStore: Block broadcast_26_piece0 stored as bytes in memory (estimated size 56.4 KiB, free 362.7 MiB) | |
2023-05-17 16:56:12,015 INFO storage.BlockManagerInfo: Added broadcast_26_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 56.4 KiB, free: 365.4 MiB) | |
2023-05-17 16:56:12,015 INFO spark.SparkContext: Created broadcast 26 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:12,016 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:12,016 INFO scheduler.TaskSchedulerImpl: Adding task set 44.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:12,017 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 44.0 (TID 34) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:12,017 INFO executor.Executor: Running task 0.0 in stage 44.0 (TID 34) | |
2023-05-17 16:56:12,029 INFO executor.Executor: Finished task 0.0 in stage 44.0 (TID 34). 958 bytes result sent to driver | |
2023-05-17 16:56:12,030 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 44.0 (TID 34) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:12,030 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 44.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:12,030 INFO scheduler.DAGScheduler: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.028 s | |
2023-05-17 16:56:12,031 INFO scheduler.DAGScheduler: Job 21 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:12,031 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 44: Stage finished | |
2023-05-17 16:56:12,031 INFO scheduler.DAGScheduler: Job 21 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.030421 s | |
2023-05-17 16:56:12,033 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/.temp/20230517165602569 | |
2023-05-17 16:56:12,033 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,034 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,037 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,037 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,038 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,039 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:12,039 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:12,122 INFO spark.SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
2023-05-17 16:56:12,123 INFO scheduler.DAGScheduler: Got job 22 (collect at SparkHoodieBackedTableMetadataWriter.java:185) with 1 output partitions | |
2023-05-17 16:56:12,123 INFO scheduler.DAGScheduler: Final stage: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) | |
2023-05-17 16:56:12,123 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 45) | |
2023-05-17 16:56:12,123 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:12,123 INFO scheduler.DAGScheduler: Submitting ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255), which has no missing parents | |
2023-05-17 16:56:12,164 INFO memory.MemoryStore: Block broadcast_27 stored as values in memory (estimated size 645.3 KiB, free 362.1 MiB) | |
2023-05-17 16:56:12,168 INFO memory.MemoryStore: Block broadcast_27_piece0 stored as bytes in memory (estimated size 242.3 KiB, free 361.9 MiB) | |
2023-05-17 16:56:12,168 INFO storage.BlockManagerInfo: Added broadcast_27_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 242.3 KiB, free: 365.2 MiB) | |
2023-05-17 16:56:12,168 INFO spark.SparkContext: Created broadcast 27 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:12,169 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:12,169 INFO scheduler.TaskSchedulerImpl: Adding task set 46.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:12,170 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 46.0 (TID 35) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:12,170 INFO executor.Executor: Running task 0.0 in stage 46.0 (TID 35) | |
2023-05-17 16:56:12,220 INFO storage.BlockManager: Found block rdd_74_0 locally | |
2023-05-17 16:56:12,221 INFO executor.Executor: Finished task 0.0 in stage 46.0 (TID 35). 1267 bytes result sent to driver | |
2023-05-17 16:56:12,222 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 46.0 (TID 35) in 52 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:12,222 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 46.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:12,223 INFO scheduler.DAGScheduler: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) finished in 0.099 s | |
2023-05-17 16:56:12,223 INFO scheduler.DAGScheduler: Job 22 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:12,223 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 46: Stage finished | |
2023-05-17 16:56:12,223 INFO scheduler.DAGScheduler: Job 22 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.101144 s | |
2023-05-17 16:56:12,225 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,226 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,227 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,229 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,229 INFO storage.BlockManagerInfo: Removed broadcast_24_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 242.4 KiB, free: 365.4 MiB) | |
2023-05-17 16:56:12,231 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,231 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,233 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,233 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:12,233 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:12,233 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:56:12,234 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,235 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,238 INFO storage.BlockManagerInfo: Removed broadcast_23_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 242.4 KiB, free: 365.6 MiB) | |
2023-05-17 16:56:12,238 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,238 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,239 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,240 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:12,240 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:12,240 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517165602569002 | |
2023-05-17 16:56:12,247 INFO storage.BlockManagerInfo: Removed broadcast_26_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 56.4 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:12,247 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,248 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:12,248 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:12,250 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:56:12,251 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:56:12,254 INFO storage.BlockManagerInfo: Removed broadcast_25_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 56.3 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:12,259 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,261 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,262 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,264 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,264 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,265 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,266 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:12,266 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:12,267 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,268 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,270 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,270 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,271 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,271 INFO view.FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
2023-05-17 16:56:12,271 INFO view.FileSystemViewManager: Creating in-memory based Table View | |
2023-05-17 16:56:12,279 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,280 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,281 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,282 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,283 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517165602569__commit__INFLIGHT]} | |
2023-05-17 16:56:12,286 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:56:12,286 INFO timeline.HoodieActiveTimeline: Marking instant complete [==>20230517165602569__commit__INFLIGHT] | |
2023-05-17 16:56:12,286 INFO timeline.HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/20230517165602569.inflight | |
2023-05-17 16:56:12,701 INFO timeline.HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t165531/.hoodie/20230517165602569.commit | |
2023-05-17 16:56:12,701 INFO timeline.HoodieActiveTimeline: Completed [==>20230517165602569__commit__INFLIGHT] | |
2023-05-17 16:56:12,701 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t165531. Falling back to direct markers. | |
2023-05-17 16:56:12,739 INFO spark.SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
2023-05-17 16:56:12,740 INFO scheduler.DAGScheduler: Got job 23 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
2023-05-17 16:56:12,740 INFO scheduler.DAGScheduler: Final stage: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) | |
2023-05-17 16:56:12,740 INFO scheduler.DAGScheduler: Parents of final stage: List() | |
2023-05-17 16:56:12,740 INFO scheduler.DAGScheduler: Missing parents: List() | |
2023-05-17 16:56:12,740 INFO scheduler.DAGScheduler: Submitting ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
2023-05-17 16:56:12,754 INFO memory.MemoryStore: Block broadcast_28 stored as values in memory (estimated size 148.9 KiB, free 363.9 MiB) | |
2023-05-17 16:56:12,755 INFO memory.MemoryStore: Block broadcast_28_piece0 stored as bytes in memory (estimated size 56.4 KiB, free 363.8 MiB) | |
2023-05-17 16:56:12,756 INFO storage.BlockManagerInfo: Added broadcast_28_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:38325 (size: 56.4 KiB, free: 365.7 MiB) | |
2023-05-17 16:56:12,756 INFO spark.SparkContext: Created broadcast 28 from broadcast at DAGScheduler.scala:1474 | |
2023-05-17 16:56:12,757 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
2023-05-17 16:56:12,757 INFO scheduler.TaskSchedulerImpl: Adding task set 47.0 with 1 tasks resource profile 0 | |
2023-05-17 16:56:12,758 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 47.0 (TID 36) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
2023-05-17 16:56:12,758 INFO executor.Executor: Running task 0.0 in stage 47.0 (TID 36) | |
2023-05-17 16:56:12,766 INFO executor.Executor: Finished task 0.0 in stage 47.0 (TID 36). 940 bytes result sent to driver | |
2023-05-17 16:56:12,767 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 47.0 (TID 36) in 9 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
2023-05-17 16:56:12,767 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 47.0, whose tasks have all completed, from pool | |
2023-05-17 16:56:12,768 INFO scheduler.DAGScheduler: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.026 s | |
2023-05-17 16:56:12,768 INFO scheduler.DAGScheduler: Job 23 is finished. Cancelling potential speculative or zombie tasks for this job | |
2023-05-17 16:56:12,768 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 47: Stage finished | |
2023-05-17 16:56:12,768 INFO scheduler.DAGScheduler: Job 23 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.028467 s | |
2023-05-17 16:56:12,770 INFO fs.FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t165531/.hoodie/.temp/20230517165602569 | |
2023-05-17 16:56:12,770 INFO client.BaseHoodieWriteClient: Committed 20230517165602569 | |
2023-05-17 16:56:12,776 INFO rdd.MapPartitionsRDD: Removing RDD 74 from persistence list | |
2023-05-17 16:56:12,777 INFO rdd.MapPartitionsRDD: Removing RDD 53 from persistence list | |
2023-05-17 16:56:12,777 INFO storage.BlockManager: Removing RDD 74 | |
2023-05-17 16:56:12,778 INFO storage.BlockManager: Removing RDD 53 | |
2023-05-17 16:56:12,778 INFO rdd.UnionRDD: Removing RDD 64 from persistence list | |
2023-05-17 16:56:12,779 INFO storage.BlockManager: Removing RDD 64 | |
2023-05-17 16:56:12,779 INFO rdd.MapPartitionsRDD: Removing RDD 43 from persistence list | |
2023-05-17 16:56:12,779 INFO storage.BlockManager: Removing RDD 43 | |
2023-05-17 16:56:12,779 INFO client.BaseHoodieWriteClient: Start to clean synchronously. | |
2023-05-17 16:56:12,780 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,781 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,783 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,783 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,784 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:12,784 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,785 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,787 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,787 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,788 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,790 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,791 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,791 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:12,792 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:12,792 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:12,792 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:12,792 INFO client.BaseHoodieWriteClient: Cleaner started | |
2023-05-17 16:56:12,792 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,793 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,794 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,794 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,795 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:12,795 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,796 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,798 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,798 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,799 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:12,800 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:12,801 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:12,801 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:12,801 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:12,801 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:12,801 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:12,801 INFO client.BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517165612779 | |
2023-05-17 16:56:12,802 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t165531. Server=ip-172-31-19-77.us-east-2.compute.internal:35375, Timeout=300 | |
2023-05-17 16:56:12,802 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:12,802 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:12,803 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:12,814 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:35375/v1/hoodie/view/compactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t165531&lastinstantts=20230517165602569&timelinehash=98834bc23bd46f9c95bb2fcef88d05bfdabf03e704dc2549eb9df5a3bdac983c) | |
2023-05-17 16:56:13,333 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,341 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,344 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,344 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,345 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,346 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,347 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,349 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,349 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,351 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,352 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,354 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:13,355 INFO view.AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,355 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,355 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,357 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,428 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:35375/v1/hoodie/view/logcompactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t165531&lastinstantts=20230517165602569&timelinehash=98834bc23bd46f9c95bb2fcef88d05bfdabf03e704dc2549eb9df5a3bdac983c) | |
2023-05-17 16:56:13,446 INFO clean.CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
2023-05-17 16:56:13,447 INFO clean.CleanPlanner: Nothing to clean here. It is already clean | |
2023-05-17 16:56:13,450 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,453 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,456 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,463 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,463 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,465 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,465 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,466 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,469 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,469 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,472 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,474 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,482 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:13,483 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,485 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,485 INFO view.FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
2023-05-17 16:56:13,485 INFO view.FileSystemViewManager: Creating remote first table view | |
2023-05-17 16:56:13,485 INFO client.BaseHoodieWriteClient: Start to archive synchronously. | |
2023-05-17 16:56:13,490 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,490 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,491 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,493 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,493 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,495 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,496 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:13,498 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:13,498 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,499 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,499 INFO client.HoodieTimelineArchiver: Not archiving as there is no compaction yet on the metadata table | |
2023-05-17 16:56:13,499 INFO client.HoodieTimelineArchiver: No Instants to archive | |
2023-05-17 16:56:13,499 INFO view.FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t165531. Server=ip-172-31-19-77.us-east-2.compute.internal:35375, Timeout=300 | |
2023-05-17 16:56:13,499 INFO view.FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,500 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,501 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,502 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,503 INFO view.RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:35375/v1/hoodie/view/refresh/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t165531&lastinstantts=20230517165602569&timelinehash=98834bc23bd46f9c95bb2fcef88d05bfdabf03e704dc2549eb9df5a3bdac983c) | |
2023-05-17 16:56:13,510 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,510 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,511 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,512 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,514 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:13,514 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:13,515 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:13,515 INFO deltastreamer.DeltaSync: Commit 20230517165602569 successful! | |
2023-05-17 16:56:13,620 INFO conf.HiveConf: Found configuration file file:/home/hadoop/spark-3.2.4-bin-hadoop3.2/conf/hive-site.xml | |
2023-05-17 16:56:13,749 WARN conf.HiveConf: HiveConf of name hive.server2.thrift.url does not exist | |
2023-05-17 16:56:13,756 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,758 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:13,760 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,760 INFO table.HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,761 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:13,811 INFO hive.metastore: Trying to connect to metastore with URI thrift://ip-172-31-19-77.us-east-2.compute.internal:9083 | |
2023-05-17 16:56:13,827 INFO hive.metastore: Opened a connection to metastore, current connections: 1 | |
2023-05-17 16:56:13,850 INFO hive.metastore: Connected to metastore. | |
2023-05-17 16:56:13,908 INFO hive.HiveSyncTool: Syncing target hoodie table with hive table(default.stocks20230517t165531). Hive metastore URL from HiveConf:thrift://ip-172-31-19-77.us-east-2.compute.internal:9083). Hive metastore URL from HiveSyncConfig:null, basePath :/tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:13,909 INFO hive.HiveSyncTool: Trying to sync hoodie table stocks20230517t165531 with base path /tmp/deltastreamertest/stocks20230517t165531 of type COPY_ON_WRITE | |
2023-05-17 16:56:13,935 INFO table.TableSchemaResolver: Reading schema from /tmp/deltastreamertest/stocks20230517t165531/2018/08/31/e44b9e93-4589-4407-a6ec-2e9b3b1cd29d-0_0-22-23_20230517165602569.parquet | |
2023-05-17 16:56:13,974 INFO storage.BlockManager: Removing RDD 74 | |
2023-05-17 16:56:13,980 INFO storage.BlockManagerInfo: Removed broadcast_28_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 56.4 KiB, free: 365.8 MiB) | |
2023-05-17 16:56:13,985 INFO storage.BlockManager: Removing RDD 64 | |
2023-05-17 16:56:13,988 INFO storage.BlockManagerInfo: Removed broadcast_27_piece0 on ip-172-31-19-77.us-east-2.compute.internal:38325 in memory (size: 242.3 KiB, free: 366.0 MiB) | |
2023-05-17 16:56:13,992 INFO hive.HiveSyncTool: Hive table stocks20230517t165531 is not found. Creating it with schema message stock_ticks { | |
optional binary _hoodie_commit_time (STRING); | |
optional binary _hoodie_commit_seqno (STRING); | |
optional binary _hoodie_record_key (STRING); | |
optional binary _hoodie_partition_path (STRING); | |
optional binary _hoodie_file_name (STRING); | |
required int64 volume; | |
required binary ts (STRING); | |
required binary symbol (STRING); | |
required int32 year; | |
required binary month (STRING); | |
required double high; | |
required double low; | |
required binary key (STRING); | |
required binary date (STRING); | |
required double close; | |
required double open; | |
required binary day (STRING); | |
} | |
2023-05-17 16:56:14,087 INFO hive.HoodieHiveSyncClient: No comment difference of stocks20230517t165531 | |
2023-05-17 16:56:14,088 INFO hive.HiveSyncTool: Schema sync complete. Syncing partitions for stocks20230517t165531 | |
2023-05-17 16:56:14,088 INFO hive.HiveSyncTool: Last commit time synced was found to be null | |
2023-05-17 16:56:14,088 INFO hive.HiveSyncTool: Sync all partitions given the last commit time synced is empty or before the start of the active timeline. Listing all partitions in /tmp/deltastreamertest/stocks20230517t165531, file system: DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_1774343305_1, ugi=hadoop (auth:SIMPLE)]] | |
2023-05-17 16:56:14,104 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:14,106 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/hoodie.properties | |
2023-05-17 16:56:14,108 INFO table.HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t165531 | |
2023-05-17 16:56:14,108 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:14,109 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:14,110 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:14,111 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:14,111 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:14,112 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:14,112 INFO metadata.HoodieTableMetadataUtil: Loading latest merged file slices for metadata table partition files | |
2023-05-17 16:56:14,112 INFO view.AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
2023-05-17 16:56:14,112 INFO util.ClusteringUtils: Found 0 files in pending clustering operations | |
2023-05-17 16:56:14,112 INFO view.AbstractTableFileSystemView: Building file system view for partition (files) | |
2023-05-17 16:56:14,113 INFO view.AbstractTableFileSystemView: addFilesToView: NumFiles=2, NumFileGroups=1, FileGroupsCreationTime=0, StoreTimeTaken=0 | |
2023-05-17 16:56:14,116 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__commit__COMPLETED]} | |
2023-05-17 16:56:14,129 INFO table.HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:14,130 INFO table.HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/.hoodie/hoodie.properties | |
2023-05-17 16:56:14,132 INFO table.HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata | |
2023-05-17 16:56:14,140 INFO timeline.HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517165602569__deltacommit__COMPLETED]} | |
2023-05-17 16:56:14,148 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:56:14,152 INFO log.AbstractHoodieLogRecordReader: Reading a delete block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
2023-05-17 16:56:14,152 INFO log.AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
2023-05-17 16:56:14,153 INFO log.AbstractHoodieLogRecordReader: Reading a data block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t165531/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 at instant 20230517165602569 | |
2023-05-17 16:56:14,153 INFO log.AbstractHoodieLogRecordReader: Merging the final data blocks | |
2023-05-17 16:56:14,153 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 2 | |
2023-05-17 16:56:14,155 INFO log.AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 1 | |
2023-05-17 16:56:14,241 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:56:14,244 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:56:14,244 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:56:14,299 INFO compress.CodecPool: Got brand-new decompressor [.gz] | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: Number of log files scanned => 1 | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: MaxMemoryInBytes allowed for compaction => 1073741824 | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: Number of entries in MemoryBasedMap in ExternalSpillableMap => 2 | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: Total size in bytes of MemoryBasedMap in ExternalSpillableMap => 1248 | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: Number of entries in BitCaskDiskMap in ExternalSpillableMap => 0 | |
2023-05-17 16:56:14,313 INFO log.HoodieMergedLogRecordScanner: Size of file spilled to disk => 0 | |
2023-05-17 16:56:14,313 INFO metadata.HoodieBackedTableMetadata: Opened 1 metadata log files (dataset instant=20230517165602569, metadata instant=20230517165602569) in 198 ms | |
2023-05-17 16:56:14,321 INFO metadata.BaseTableMetadata: Listed partitions from metadata: #partitions=1 | |
2023-05-17 16:56:14,323 INFO hive.HiveSyncTool: New Partitions [2018/08/31] | |
2023-05-17 16:56:14,323 INFO ddl.HMSDDLExecutor: Adding partitions 1 to table stocks20230517t165531 | |
2023-05-17 16:56:14,365 INFO ddl.HMSDDLExecutor: HMSDDLExecutor add a batch partitions done: 1 | |
2023-05-17 16:56:14,400 INFO hive.HiveSyncTool: Sync complete for stocks20230517t165531 | |
2023-05-17 16:56:14,404 INFO hive.metastore: Closed a connection to metastore, current connections: 0 | |
2023-05-17 16:56:14,404 INFO deltastreamer.DeltaSync: Shutting down embedded timeline server | |
2023-05-17 16:56:14,405 INFO embedded.EmbeddedTimelineService: Closing Timeline server | |
2023-05-17 16:56:14,405 INFO service.TimelineService: Closing Timeline Service | |
2023-05-17 16:56:14,405 INFO javalin.Javalin: Stopping Javalin ... | |
2023-05-17 16:56:14,417 INFO javalin.Javalin: Javalin has stopped | |
2023-05-17 16:56:14,417 INFO service.TimelineService: Closed Timeline Service | |
2023-05-17 16:56:14,417 INFO embedded.EmbeddedTimelineService: Closed Timeline server | |
2023-05-17 16:56:14,418 INFO deltastreamer.HoodieDeltaStreamer: Shut down delta streamer | |
2023-05-17 16:56:14,425 INFO server.AbstractConnector: Stopped Spark@4fe875be{HTTP/1.1, (http/1.1)}{0.0.0.0:8090} | |
2023-05-17 16:56:14,427 INFO ui.SparkUI: Stopped Spark web UI at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
2023-05-17 16:56:14,438 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! | |
2023-05-17 16:56:14,456 INFO memory.MemoryStore: MemoryStore cleared | |
2023-05-17 16:56:14,457 INFO storage.BlockManager: BlockManager stopped | |
2023-05-17 16:56:14,462 INFO storage.BlockManagerMaster: BlockManagerMaster stopped | |
2023-05-17 16:56:14,465 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! | |
2023-05-17 16:56:14,470 INFO spark.SparkContext: Successfully stopped SparkContext | |
2023-05-17 16:56:14,472 INFO util.ShutdownHookManager: Shutdown hook called | |
2023-05-17 16:56:14,472 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-d4b7394b-6fc9-4bba-8204-48d645c92fd3 | |
2023-05-17 16:56:14,475 INFO util.ShutdownHookManager: Deleting directory /mnt/tmp/spark-34f281ee-0421-41e9-a6a1-111b3ce9c37a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment