Last active
May 17, 2023 16:17
-
-
Save ad1happy2go/7ce9b1243c098df40b4719d9000c2fee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/schema.avsc /tmp/ | |
hadoop fs -cp s3://rxusandbox-us-west-2/testcases/stocks/data/source /tmp/source_parquet | |
NOW=$(date '+%Y%m%dt%H%M%S') | |
bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
--jars /home/hadoop//v_3.3/hudi-hive-sync-bundle-0.13.0.jar,/home/hadoop/v_3.3/hudi-spark3.3-bundle_2.12-0.13.0.jar \ | |
/home/hadoop/v_3.3/hudi-utilities-slim-bundle_2.12-0.13.0.jar \ | |
--target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
--target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
--source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
--source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
--hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
--op UPSERT --enable-sync --spark-master yarn \ | |
--hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
--hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
--hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
--hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
--hoodie-conf hoodie.metadata.enable=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
--hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
--hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
--hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
--hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
--hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
=======LOGS========== | |
[hadoop@ip-172-31-19-77 spark-3.3.2-bin-hadoop3]$ bin/spark-submit --master local --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ | |
> --jars /home/hadoop//v_3.3/hudi-hive-sync-bundle-0.13.0.jar,/home/hadoop/v_3.3/hudi-spark3.3-bundle_2.12-0.13.0.jar \ | |
> /home/hadoop/v_3.3/hudi-utilities-slim-bundle_2.12-0.13.0.jar \ | |
> --target-base-path /tmp/deltastreamertest/stocks${NOW} \ | |
> --target-table stocks${NOW} --table-type COPY_ON_WRITE --base-file-format PARQUET \ | |
> --source-class org.apache.hudi.utilities.sources.JsonDFSSource \ | |
> --source-ordering-field ts --payload-class org.apache.hudi.common.model.DefaultHoodieRecordPayload \ | |
> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.source.schema.file=/tmp/schema.avsc \ | |
> --hoodie-conf hoodie.deltastreamer.schemaprovider.target.schema.file=/tmp/schema.avsc \ | |
> --op UPSERT --enable-sync --spark-master yarn \ | |
> --hoodie-conf hoodie.deltastreamer.source.dfs.root=/tmp/source_parquet \ | |
> --hoodie-conf hoodie.datasource.write.recordkey.field=symbol \ | |
> --hoodie-conf hoodie.datasource.write.partitionpath.field=date --hoodie-conf hoodie.datasource.write.precombine.field=ts \ | |
> --hoodie-conf hoodie.datasource.write.keygenerator.type=SIMPLE --hoodie-conf hoodie.datasource.write.hive_style_partitioning=false \ | |
> --hoodie-conf hoodie.metadata.enable=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.mode=hms \ | |
> --hoodie-conf hoodie.datasource.hive_sync.skip_ro_suffix=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.ignore_exceptions=false \ | |
> --hoodie-conf hoodie.datasource.hive_sync.auto_create_database=true \ | |
> --hoodie-conf hoodie.datasource.hive_sync.database=default \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_fields=date \ | |
> --hoodie-conf hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor \ | |
> --hoodie-conf hoodie.datasource.hive_sync.sync_as_datasource=true --hoodie-conf hoodie.datasource.hive_sync.sync_comment=true | |
SLF4J: Class path contains multiple SLF4J bindings. | |
SLF4J: Found binding in [jar:file:/home/hadoop/spark-3.3.2-bin-hadoop3/jars/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] | |
23/05/17 16:14:48 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
23/05/17 16:14:49 WARN SchedulerConfGenerator: Job Scheduling Configs will not be in effect as spark.scheduler.mode is not set to FAIR at instantiation time. Continuing without scheduling configs | |
23/05/17 16:14:49 INFO SparkContext: Running Spark version 3.3.2 | |
23/05/17 16:14:49 INFO ResourceUtils: ============================================================== | |
23/05/17 16:14:49 INFO ResourceUtils: No custom resources configured for spark.driver. | |
23/05/17 16:14:49 INFO ResourceUtils: ============================================================== | |
23/05/17 16:14:49 INFO SparkContext: Submitted application: delta-streamer-stocks20230517t161102 | |
23/05/17 16:14:49 INFO ResourceProfile: Default ResourceProfile created, executor resources: Map(cores -> name: cores, amount: 1, script: , vendor: , memory -> name: memory, amount: 1024, script: , vendor: , offHeap -> name: offHeap, amount: 0, script: , vendor: ), task resources: Map(cpus -> name: cpus, amount: 1.0) | |
23/05/17 16:14:49 INFO ResourceProfile: Limiting resource is cpu | |
23/05/17 16:14:49 INFO ResourceProfileManager: Added ResourceProfile id: 0 | |
23/05/17 16:14:49 INFO SecurityManager: Changing view acls to: hadoop | |
23/05/17 16:14:49 INFO SecurityManager: Changing modify acls to: hadoop | |
23/05/17 16:14:49 INFO SecurityManager: Changing view acls groups to: | |
23/05/17 16:14:49 INFO SecurityManager: Changing modify acls groups to: | |
23/05/17 16:14:49 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); groups with view permissions: Set(); users with modify permissions: Set(hadoop); groups with modify permissions: Set() | |
23/05/17 16:14:49 INFO deprecation: mapred.output.compression.codec is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.codec | |
23/05/17 16:14:49 INFO deprecation: mapred.output.compress is deprecated. Instead, use mapreduce.output.fileoutputformat.compress | |
23/05/17 16:14:49 INFO deprecation: mapred.output.compression.type is deprecated. Instead, use mapreduce.output.fileoutputformat.compress.type | |
23/05/17 16:14:49 INFO Utils: Successfully started service 'sparkDriver' on port 37769. | |
23/05/17 16:14:49 INFO SparkEnv: Registering MapOutputTracker | |
23/05/17 16:14:49 INFO SparkEnv: Registering BlockManagerMaster | |
23/05/17 16:14:49 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information | |
23/05/17 16:14:49 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up | |
23/05/17 16:14:49 INFO SparkEnv: Registering BlockManagerMasterHeartbeat | |
23/05/17 16:14:49 INFO DiskBlockManager: Created local directory at /mnt/tmp/blockmgr-c922dd92-ec97-4b3e-b96a-7cd85673cc98 | |
23/05/17 16:14:49 INFO MemoryStore: MemoryStore started with capacity 366.3 MiB | |
23/05/17 16:14:49 INFO SparkEnv: Registering OutputCommitCoordinator | |
23/05/17 16:14:50 INFO Utils: Successfully started service 'SparkUI' on port 8090. | |
23/05/17 16:14:50 INFO SparkContext: Added JAR file:///home/hadoop/v_3.3/hudi-hive-sync-bundle-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO SparkContext: Added JAR file:///home/hadoop/v_3.3/hudi-spark3.3-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-spark3.3-bundle_2.12-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO SparkContext: Added JAR file:/home/hadoop/v_3.3/hudi-utilities-slim-bundle_2.12-0.13.0.jar at spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO Executor: Starting executor ID driver on host ip-172-31-19-77.us-east-2.compute.internal | |
23/05/17 16:14:50 INFO Executor: Starting executor with user classpath (userClassPathFirst = false): '' | |
23/05/17 16:14:50 INFO Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-hive-sync-bundle-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO TransportClientFactory: Successfully created connection to ip-172-31-19-77.us-east-2.compute.internal/172.31.19.77:37769 after 37 ms (0 ms spent in bootstraps) | |
23/05/17 16:14:50 INFO Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-hive-sync-bundle-0.13.0.jar to /mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/fetchFileTemp826132836941859285.tmp | |
23/05/17 16:14:50 INFO Executor: Adding file:/mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/hudi-hive-sync-bundle-0.13.0.jar to class loader | |
23/05/17 16:14:50 INFO Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-spark3.3-bundle_2.12-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-spark3.3-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/fetchFileTemp6053686297156611149.tmp | |
23/05/17 16:14:50 INFO Executor: Adding file:/mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/hudi-spark3.3-bundle_2.12-0.13.0.jar to class loader | |
23/05/17 16:14:50 INFO Executor: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar with timestamp 1684340089194 | |
23/05/17 16:14:50 INFO Utils: Fetching spark://ip-172-31-19-77.us-east-2.compute.internal:37769/jars/hudi-utilities-slim-bundle_2.12-0.13.0.jar to /mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/fetchFileTemp8138300703154979138.tmp | |
23/05/17 16:14:50 INFO Executor: Adding file:/mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1/userFiles-19d3579a-8f5d-4179-9b8c-7841d50f091a/hudi-utilities-slim-bundle_2.12-0.13.0.jar to class loader | |
23/05/17 16:14:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 35347. | |
23/05/17 16:14:50 INFO NettyBlockTransferService: Server created on ip-172-31-19-77.us-east-2.compute.internal:35347 | |
23/05/17 16:14:50 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy | |
23/05/17 16:14:50 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 35347, None) | |
23/05/17 16:14:50 INFO BlockManagerMasterEndpoint: Registering block manager ip-172-31-19-77.us-east-2.compute.internal:35347 with 366.3 MiB RAM, BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 35347, None) | |
23/05/17 16:14:50 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 35347, None) | |
23/05/17 16:14:50 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, ip-172-31-19-77.us-east-2.compute.internal, 35347, None) | |
23/05/17 16:14:51 WARN DFSPropertiesConfiguration: Cannot find HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf | |
23/05/17 16:14:51 INFO UtilHelpers: Adding overridden properties to file properties. | |
23/05/17 16:14:51 WARN SparkContext: Using an existing SparkContext; some configuration may not take effect. | |
23/05/17 16:14:51 INFO HoodieDeltaStreamer: Creating delta streamer with configs: | |
hoodie.auto.adjust.lock.configs: true | |
hoodie.cleaner.policy.failed.writes: EAGER | |
hoodie.datasource.hive_sync.auto_create_database: true | |
hoodie.datasource.hive_sync.database: default | |
hoodie.datasource.hive_sync.ignore_exceptions: false | |
hoodie.datasource.hive_sync.jdbcurl: jdbc:hive2://ip-172-31-19-77.us-east-2.compute.internal:10000 | |
hoodie.datasource.hive_sync.mode: hms | |
hoodie.datasource.hive_sync.partition_extractor_class: org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor | |
hoodie.datasource.hive_sync.partition_fields: date | |
hoodie.datasource.hive_sync.skip_ro_suffix: true | |
hoodie.datasource.hive_sync.sync_as_datasource: true | |
hoodie.datasource.hive_sync.sync_comment: true | |
hoodie.datasource.write.hive_style_partitioning: false | |
hoodie.datasource.write.keygenerator.type: SIMPLE | |
hoodie.datasource.write.partitionpath.field: date | |
hoodie.datasource.write.precombine.field: ts | |
hoodie.datasource.write.reconcile.schema: false | |
hoodie.datasource.write.recordkey.field: symbol | |
hoodie.deltastreamer.schemaprovider.source.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.schemaprovider.target.schema.file: /tmp/schema.avsc | |
hoodie.deltastreamer.source.dfs.root: /tmp/source_parquet | |
hoodie.index.type: BLOOM | |
hoodie.metadata.enable: true | |
hoodie.write.concurrency.mode: single_writer | |
hoodie.write.lock.provider: org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider | |
hoodie.write.lock.zookeeper.base_path: /hudi | |
hoodie.write.lock.zookeeper.port: 2181 | |
hoodie.write.lock.zookeeper.url: ip-172-31-19-77.us-east-2.compute.internal | |
23/05/17 16:14:51 INFO FSUtils: Resolving file /tmp/schema.avscto be a remote file. | |
23/05/17 16:14:51 INFO HoodieSparkKeyGeneratorFactory: The value of hoodie.datasource.write.keygenerator.type is empty, use SIMPLE | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t161102 as hoodie table /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Finished initializing Table of type COPY_ON_WRITE from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO DFSPathSelector: Using path selector org.apache.hudi.utilities.sources.helpers.DFSPathSelector | |
23/05/17 16:14:51 INFO HoodieDeltaStreamer: Delta Streamer running only single round | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:51 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:51 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:51 INFO DeltaSync: Checkpoint to resume from : Optional.empty | |
23/05/17 16:14:52 INFO DFSPathSelector: Root path => /tmp/source_parquet source limit => 9223372036854775807 | |
23/05/17 16:14:52 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 501.9 KiB, free 365.8 MiB) | |
23/05/17 16:14:52 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 53.9 KiB, free 365.8 MiB) | |
23/05/17 16:14:52 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 53.9 KiB, free: 366.2 MiB) | |
23/05/17 16:14:52 INFO SparkContext: Created broadcast 0 from textFile at JsonDFSSource.java:54 | |
23/05/17 16:14:52 ERROR GPLNativeCodeLoader: Could not load native gpl library | |
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path | |
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) | |
at java.lang.Runtime.loadLibrary0(Runtime.java:843) | |
at java.lang.System.loadLibrary(System.java:1136) | |
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32) | |
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71) | |
at java.lang.Class.forName0(Native Method) | |
at java.lang.Class.forName(Class.java:348) | |
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:2625) | |
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2590) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) | |
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180) | |
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) | |
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:81) | |
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:139) | |
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:194) | |
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:208) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:292) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:292) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:288) | |
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) | |
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:292) | |
at scala.Option.getOrElse(Option.scala:189) | |
at org.apache.spark.rdd.RDD.partitions(RDD.scala:288) | |
at org.apache.spark.rdd.RDD.$anonfun$isEmpty$1(RDD.scala:1578) | |
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:406) | |
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1578) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty(JavaRDDLike.scala:558) | |
at org.apache.spark.api.java.JavaRDDLike.isEmpty$(JavaRDDLike.scala:558) | |
at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:545) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:460) | |
at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:364) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:215) | |
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:213) | |
at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:592) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) | |
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:958) | |
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) | |
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) | |
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) | |
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1046) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1055) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
23/05/17 16:14:52 ERROR LzoCodec: Cannot load native-lzo without native-hadoop | |
23/05/17 16:14:52 INFO FileInputFormat: Total input files to process : 2 | |
23/05/17 16:14:52 INFO SparkContext: Starting job: isEmpty at DeltaSync.java:545 | |
23/05/17 16:14:52 INFO DAGScheduler: Got job 0 (isEmpty at DeltaSync.java:545) with 1 output partitions | |
23/05/17 16:14:52 INFO DAGScheduler: Final stage: ResultStage 0 (isEmpty at DeltaSync.java:545) | |
23/05/17 16:14:52 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:14:52 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:14:52 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67), which has no missing parents | |
23/05/17 16:14:52 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 6.5 KiB, free 365.8 MiB) | |
23/05/17 16:14:52 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 3.6 KiB, free 365.7 MiB) | |
23/05/17 16:14:52 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 3.6 KiB, free: 366.2 MiB) | |
23/05/17 16:14:52 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:53 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at map at SourceFormatAdapter.java:67) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:53 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:53 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:53 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) | |
23/05/17 16:14:53 INFO HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
23/05/17 16:14:53 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1281 bytes result sent to driver | |
23/05/17 16:14:53 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 377 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:14:53 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
23/05/17 16:14:53 INFO DAGScheduler: ResultStage 0 (isEmpty at DeltaSync.java:545) finished in 0.483 s | |
23/05/17 16:14:53 INFO DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:53 INFO TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished | |
23/05/17 16:14:53 INFO DAGScheduler: Job 0 finished: isEmpty at DeltaSync.java:545, took 0.554018 s | |
23/05/17 16:14:53 INFO DeltaSync: Setting up new Hoodie Write Client | |
23/05/17 16:14:53 INFO EmbeddedTimelineService: Starting Timeline service !! | |
23/05/17 16:14:53 INFO EmbeddedTimelineService: Overriding hostIp to (ip-172-31-19-77.us-east-2.compute.internal) found in spark-conf. It was null | |
23/05/17 16:14:53 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:14:53 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:14:53 INFO log: Logging initialized @6630ms to org.apache.hudi.org.eclipse.jetty.util.log.Slf4jLog | |
23/05/17 16:14:53 INFO Javalin: | |
__ __ _ __ __ | |
/ /____ _ _ __ ____ _ / /(_)____ / // / | |
__ / // __ `/| | / // __ `// // // __ \ / // /_ | |
/ /_/ // /_/ / | |/ // /_/ // // // / / / /__ __/ | |
\____/ \__,_/ |___/ \__,_//_//_//_/ /_/ /_/ | |
https://javalin.io/documentation | |
23/05/17 16:14:53 INFO Javalin: Starting Javalin ... | |
23/05/17 16:14:53 INFO Javalin: You are running Javalin 4.6.7 (released October 24, 2022. Your Javalin version is 205 days old. Consider checking for a newer version.). | |
23/05/17 16:14:53 INFO Server: jetty-9.4.48.v20220622; built: 2022-06-21T20:42:25.880Z; git: 6b67c5719d1f4371b33655ff2d047d24e171e49a; jvm 1.8.0_372-b07 | |
23/05/17 16:14:53 INFO BlockManagerInfo: Removed broadcast_1_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 3.6 KiB, free: 366.2 MiB) | |
23/05/17 16:14:54 INFO Server: Started @7229ms | |
23/05/17 16:14:54 INFO Javalin: Listening on http://localhost:36617/ | |
23/05/17 16:14:54 INFO Javalin: Javalin started in 273ms \o/ | |
23/05/17 16:14:54 INFO TimelineService: Starting Timeline server on port :36617 | |
23/05/17 16:14:54 INFO EmbeddedTimelineService: Started embedded timeline server at ip-172-31-19-77.us-east-2.compute.internal:36617 | |
23/05/17 16:14:54 INFO BaseHoodieClient: Timeline Server already running. Not restarting the service | |
23/05/17 16:14:54 INFO BaseHoodieClient: Timeline Server already running. Not restarting the service | |
23/05/17 16:14:54 INFO SparkContext: Starting job: isEmpty at DeltaSync.java:665 | |
23/05/17 16:14:54 INFO DAGScheduler: Got job 1 (isEmpty at DeltaSync.java:665) with 1 output partitions | |
23/05/17 16:14:54 INFO DAGScheduler: Final stage: ResultStage 1 (isEmpty at DeltaSync.java:665) | |
23/05/17 16:14:54 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:14:54 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:14:54 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558), which has no missing parents | |
23/05/17 16:14:54 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 13.0 KiB, free 365.7 MiB) | |
23/05/17 16:14:54 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 6.8 KiB, free 365.7 MiB) | |
23/05/17 16:14:54 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 6.8 KiB, free: 366.2 MiB) | |
23/05/17 16:14:54 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:54 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[3] at map at DeltaSync.java:558) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:54 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:54 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4555 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:54 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) | |
23/05/17 16:14:54 INFO HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
23/05/17 16:14:54 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1080 bytes result sent to driver | |
23/05/17 16:14:54 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 75 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:14:54 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
23/05/17 16:14:54 INFO DAGScheduler: ResultStage 1 (isEmpty at DeltaSync.java:665) finished in 0.086 s | |
23/05/17 16:14:54 INFO DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:54 INFO TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished | |
23/05/17 16:14:54 INFO DAGScheduler: Job 1 finished: isEmpty at DeltaSync.java:665, took 0.091919 s | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:54 INFO CleanerUtils: Cleaned failed attempts if any | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 WARN HoodieBackedTableMetadata: Metadata table was not found at path /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:14:54 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:14:54 INFO BaseHoodieWriteClient: Generate a new instant time: 20230517161454289 action: commit | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Creating a new instant [==>20230517161454289__commit__REQUESTED] | |
23/05/17 16:14:54 INFO DeltaSync: Starting commit : 20230517161454289 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__REQUESTED]} | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__REQUESTED]} | |
23/05/17 16:14:54 INFO HoodieBackedTableMetadataWriter: Creating a new metadata table in /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata at instant 00000000000000 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Initializing /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata as hoodie table /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished initializing Table of type MERGE_ON_READ from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:54 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:54 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:54 INFO AbstractTableFileSystemView: Took 3 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:54 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:54 INFO HoodieBackedTableMetadataWriter: Creating 1 file groups for partition files with base fileId files- at instant time 00000000000000 | |
23/05/17 16:14:54 INFO HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
23/05/17 16:14:54 INFO HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
23/05/17 16:14:54 INFO HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=0} does not exist. Create a new file | |
23/05/17 16:14:55 INFO HoodieBackedTableMetadataWriter: Initializing metadata table by using file listings in /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:55 INFO HoodieBackedTableMetadataWriter: Triggering empty Commit to metadata to initialize | |
23/05/17 16:14:55 INFO AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:55 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:55 INFO BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
23/05/17 16:14:55 INFO BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:55 INFO CleanerUtils: Cleaned failed attempts if any | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Loaded instants upto : Optional.empty | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:14:55 INFO BaseHoodieWriteClient: Generate a new instant time: 00000000000000 action: deltacommit | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Creating a new instant [==>00000000000000__deltacommit__REQUESTED] | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>00000000000000__deltacommit__REQUESTED]} | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:14:55 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:14:55 INFO AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
23/05/17 16:14:55 INFO AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
23/05/17 16:14:55 INFO AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:55 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:55 INFO SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
23/05/17 16:14:55 INFO DAGScheduler: Job 2 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.016591 s | |
23/05/17 16:14:55 INFO BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=0}, InputPartitionStat={}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
23/05/17 16:14:55 INFO UpsertPartitioner: AvgRecordSize => 1024 | |
23/05/17 16:14:55 INFO AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:55 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:55 INFO UpsertPartitioner: Total Buckets :0, buckets info => {}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{} | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/00000000000000.deltacommit.requested | |
23/05/17 16:14:55 INFO FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
23/05/17 16:14:55 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
23/05/17 16:14:55 INFO BaseSparkCommitActionExecutor: no validators configured. | |
23/05/17 16:14:55 INFO BaseCommitActionExecutor: Auto commit enabled: Committing 00000000000000 | |
23/05/17 16:14:55 INFO SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
23/05/17 16:14:55 INFO DAGScheduler: Job 3 finished: collect at HoodieJavaRDD.java:163, took 0.001393 s | |
23/05/17 16:14:55 INFO CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:0 numReplaceFileIds:0 | |
23/05/17 16:14:56 INFO SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
23/05/17 16:14:56 INFO DAGScheduler: Job 4 finished: collect at HoodieJavaRDD.java:163, took 0.009241 s | |
23/05/17 16:14:56 INFO BaseSparkCommitActionExecutor: Committing 00000000000000, action Type deltacommit, operation Type UPSERT_PREPPED | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Marking instant complete [==>00000000000000__deltacommit__INFLIGHT] | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/00000000000000.deltacommit.inflight | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/00000000000000.deltacommit | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Completed [==>00000000000000__deltacommit__INFLIGHT] | |
23/05/17 16:14:56 INFO BaseSparkCommitActionExecutor: Committed 00000000000000 | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:14:56 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:14:56 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:14:56 INFO BlockManagerInfo: Removed broadcast_2_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 6.8 KiB, free: 366.2 MiB) | |
23/05/17 16:14:56 INFO SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
23/05/17 16:14:56 INFO DAGScheduler: Job 5 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.022752 s | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:56 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:14:56 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:56 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:56 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:14:56 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:14:56 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:14:56 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:56 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:56 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:14:56 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:14:56 INFO AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
23/05/17 16:14:56 INFO AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
23/05/17 16:14:56 INFO SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
23/05/17 16:14:56 INFO DAGScheduler: Registering RDD 15 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 3 | |
23/05/17 16:14:56 INFO DAGScheduler: Registering RDD 19 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 2 | |
23/05/17 16:14:56 INFO DAGScheduler: Got job 6 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
23/05/17 16:14:56 INFO DAGScheduler: Final stage: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) | |
23/05/17 16:14:56 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 3) | |
23/05/17 16:14:56 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 3) | |
23/05/17 16:14:56 INFO DAGScheduler: Submitting ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
23/05/17 16:14:56 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 31.3 KiB, free 365.7 MiB) | |
23/05/17 16:14:56 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 14.9 KiB, free 365.7 MiB) | |
23/05/17 16:14:56 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 14.9 KiB, free: 366.2 MiB) | |
23/05/17 16:14:56 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:56 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 2 (MapPartitionsRDD[15] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:56 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:56 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, ANY, 4544 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:56 INFO Executor: Running task 0.0 in stage 2.0 (TID 2) | |
23/05/17 16:14:56 INFO HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_1.json:0+759994 | |
23/05/17 16:14:57 INFO Executor: Finished task 0.0 in stage 2.0 (TID 2). 1210 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 3) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, ANY, 4544 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:57 INFO Executor: Running task 1.0 in stage 2.0 (TID 3) | |
23/05/17 16:14:57 INFO HadoopRDD: Input split: hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/source_parquet/batch_2.json:0+363815 | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 628 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:57 INFO Executor: Finished task 1.0 in stage 2.0 (TID 3). 1210 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 3) in 125 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
23/05/17 16:14:57 INFO DAGScheduler: ShuffleMapStage 2 (mapToPair at HoodieJavaRDD.java:135) finished in 0.752 s | |
23/05/17 16:14:57 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:57 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:57 INFO DAGScheduler: waiting: Set(ShuffleMapStage 3, ResultStage 4) | |
23/05/17 16:14:57 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:57 INFO DAGScheduler: Submitting ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:14:57 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 25.5 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 12.2 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 12.2 KiB, free: 366.2 MiB) | |
23/05/17 16:14:57 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:57 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 3 (MapPartitionsRDD[19] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Adding task set 3.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:57 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 4) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:57 INFO Executor: Running task 0.0 in stage 3.0 (TID 4) | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 15 ms | |
23/05/17 16:14:57 INFO MemoryStore: Block rdd_17_0 stored as values in memory (estimated size 8.2 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO BlockManagerInfo: Added rdd_17_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 8.2 KiB, free: 366.2 MiB) | |
23/05/17 16:14:57 INFO Executor: Finished task 0.0 in stage 3.0 (TID 4). 1425 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Starting task 1.0 in stage 3.0 (TID 5) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 4) in 189 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:57 INFO Executor: Running task 1.0 in stage 3.0 (TID 5) | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms | |
23/05/17 16:14:57 INFO MemoryStore: Block rdd_17_1 stored as values in memory (estimated size 5.8 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO BlockManagerInfo: Added rdd_17_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 5.8 KiB, free: 366.2 MiB) | |
23/05/17 16:14:57 INFO Executor: Finished task 1.0 in stage 3.0 (TID 5). 1425 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 1.0 in stage 3.0 (TID 5) in 49 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool | |
23/05/17 16:14:57 INFO DAGScheduler: ShuffleMapStage 3 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.276 s | |
23/05/17 16:14:57 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:57 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:57 INFO DAGScheduler: waiting: Set(ResultStage 4) | |
23/05/17 16:14:57 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:57 INFO DAGScheduler: Submitting ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:14:57 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 5.5 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 365.7 MiB) | |
23/05/17 16:14:57 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 3.2 KiB, free: 366.2 MiB) | |
23/05/17 16:14:57 INFO SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:57 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (ShuffledRDD[20] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Adding task set 4.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:57 INFO TaskSetManager: Starting task 1.0 in stage 4.0 (TID 6) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:57 INFO Executor: Running task 1.0 in stage 4.0 (TID 6) | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Getting 2 (120.0 B) non-empty blocks including 2 (120.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:57 INFO Executor: Finished task 1.0 in stage 4.0 (TID 6). 1252 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 7) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 1.0 in stage 4.0 (TID 6) in 14 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:57 INFO Executor: Running task 0.0 in stage 4.0 (TID 7) | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:57 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:57 INFO Executor: Finished task 0.0 in stage 4.0 (TID 7). 1235 bytes result sent to driver | |
23/05/17 16:14:57 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 7) in 28 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool | |
23/05/17 16:14:57 INFO DAGScheduler: ResultStage 4 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.061 s | |
23/05/17 16:14:57 INFO DAGScheduler: Job 6 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:57 INFO TaskSchedulerImpl: Killing all running tasks in stage 4: Stage finished | |
23/05/17 16:14:57 INFO DAGScheduler: Job 6 finished: countByKey at HoodieJavaPairRDD.java:105, took 1.137979 s | |
23/05/17 16:14:58 INFO BlockManager: Removing RDD 4 | |
23/05/17 16:14:58 INFO BlockManagerInfo: Removed broadcast_5_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 3.2 KiB, free: 366.2 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Removed broadcast_4_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 12.2 KiB, free: 366.2 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Removed broadcast_3_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 14.9 KiB, free: 366.2 MiB) | |
23/05/17 16:14:58 INFO BlockManager: Removing RDD 12 | |
23/05/17 16:14:58 INFO SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
23/05/17 16:14:58 INFO DAGScheduler: Got job 7 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
23/05/17 16:14:58 INFO DAGScheduler: Final stage: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) | |
23/05/17 16:14:58 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:14:58 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 642.5 KiB, free 365.1 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 240.4 KiB, free 364.9 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 240.4 KiB, free: 366.0 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[22] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 5.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 5.0 (TID 8) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 5.0 (TID 8) | |
23/05/17 16:14:58 INFO Executor: Finished task 0.0 in stage 5.0 (TID 8). 805 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 0.0 in stage 5.0 (TID 8) in 79 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool | |
23/05/17 16:14:58 INFO DAGScheduler: ResultStage 5 (collect at HoodieSparkEngineContext.java:137) finished in 0.143 s | |
23/05/17 16:14:58 INFO DAGScheduler: Job 7 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Killing all running tasks in stage 5: Stage finished | |
23/05/17 16:14:58 INFO DAGScheduler: Job 7 finished: collect at HoodieSparkEngineContext.java:137, took 0.147933 s | |
23/05/17 16:14:58 INFO SparkContext: Starting job: collect at HoodieSparkEngineContext.java:103 | |
23/05/17 16:14:58 INFO DAGScheduler: Got job 8 (collect at HoodieSparkEngineContext.java:103) with 1 output partitions | |
23/05/17 16:14:58 INFO DAGScheduler: Final stage: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) | |
23/05/17 16:14:58 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:14:58 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 642.3 KiB, free 364.3 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 240.4 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 240.4 KiB, free: 365.8 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 6 (MapPartitionsRDD[24] at map at HoodieSparkEngineContext.java:103) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 6.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 6.0 (TID 9) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4332 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 6.0 (TID 9) | |
23/05/17 16:14:58 INFO Executor: Finished task 0.0 in stage 6.0 (TID 9). 805 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 0.0 in stage 6.0 (TID 9) in 54 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool | |
23/05/17 16:14:58 INFO DAGScheduler: ResultStage 6 (collect at HoodieSparkEngineContext.java:103) finished in 0.127 s | |
23/05/17 16:14:58 INFO DAGScheduler: Job 8 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Killing all running tasks in stage 6: Stage finished | |
23/05/17 16:14:58 INFO DAGScheduler: Job 8 finished: collect at HoodieSparkEngineContext.java:103, took 0.133691 s | |
23/05/17 16:14:58 INFO SparkHoodieBloomIndexHelper: Input parallelism: 2, Index parallelism: 2 | |
23/05/17 16:14:58 INFO SparkContext: Starting job: countByKey at SparkHoodieBloomIndexHelper.java:195 | |
23/05/17 16:14:58 INFO DAGScheduler: Registering RDD 27 (countByKey at SparkHoodieBloomIndexHelper.java:195) as input to shuffle 4 | |
23/05/17 16:14:58 INFO DAGScheduler: Got job 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) with 2 output partitions | |
23/05/17 16:14:58 INFO DAGScheduler: Final stage: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) | |
23/05/17 16:14:58 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 8) | |
23/05/17 16:14:58 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 8) | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 27.1 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 12.8 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 12.8 KiB, free: 365.8 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 8 (MapPartitionsRDD[27] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 8.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 8.0 (TID 10) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 8.0 (TID 10) | |
23/05/17 16:14:58 INFO BlockManager: Found block rdd_17_0 locally | |
23/05/17 16:14:58 INFO Executor: Finished task 0.0 in stage 8.0 (TID 10). 1124 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 1.0 in stage 8.0 (TID 11) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 1.0 in stage 8.0 (TID 11) | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 0.0 in stage 8.0 (TID 10) in 27 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:58 INFO BlockManager: Found block rdd_17_1 locally | |
23/05/17 16:14:58 INFO Executor: Finished task 1.0 in stage 8.0 (TID 11). 1124 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 1.0 in stage 8.0 (TID 11) in 20 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool | |
23/05/17 16:14:58 INFO DAGScheduler: ShuffleMapStage 8 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.063 s | |
23/05/17 16:14:58 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:58 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:58 INFO DAGScheduler: waiting: Set(ResultStage 9) | |
23/05/17 16:14:58 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_9 stored as values in memory (estimated size 5.5 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 3.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 9 (ShuffledRDD[28] at countByKey at SparkHoodieBloomIndexHelper.java:195) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 9.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 12) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 9.0 (TID 12) | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:58 INFO Executor: Finished task 0.0 in stage 9.0 (TID 12). 1235 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 1.0 in stage 9.0 (TID 13) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 0.0 in stage 9.0 (TID 12) in 8 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:58 INFO Executor: Running task 1.0 in stage 9.0 (TID 13) | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:58 INFO Executor: Finished task 1.0 in stage 9.0 (TID 13). 1235 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 1.0 in stage 9.0 (TID 13) in 10 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool | |
23/05/17 16:14:58 INFO DAGScheduler: ResultStage 9 (countByKey at SparkHoodieBloomIndexHelper.java:195) finished in 0.027 s | |
23/05/17 16:14:58 INFO DAGScheduler: Job 9 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Killing all running tasks in stage 9: Stage finished | |
23/05/17 16:14:58 INFO DAGScheduler: Job 9 finished: countByKey at SparkHoodieBloomIndexHelper.java:195, took 0.107652 s | |
23/05/17 16:14:58 INFO BucketizedBloomCheckPartitioner: TotalBuckets 0, min_buckets/partition 1 | |
23/05/17 16:14:58 INFO MapPartitionsRDD: Removing RDD 17 from persistence list | |
23/05/17 16:14:58 INFO MapPartitionsRDD: Removing RDD 35 from persistence list | |
23/05/17 16:14:58 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:58 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:58 INFO BlockManager: Removing RDD 17 | |
23/05/17 16:14:58 INFO BlockManager: Removing RDD 35 | |
23/05/17 16:14:58 INFO SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
23/05/17 16:14:58 INFO DAGScheduler: Registering RDD 29 (mapToPair at SparkHoodieBloomIndexHelper.java:164) as input to shuffle 8 | |
23/05/17 16:14:58 INFO DAGScheduler: Registering RDD 35 (flatMapToPair at SparkHoodieBloomIndexHelper.java:175) as input to shuffle 6 | |
23/05/17 16:14:58 INFO DAGScheduler: Registering RDD 36 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 5 | |
23/05/17 16:14:58 INFO DAGScheduler: Registering RDD 45 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 7 | |
23/05/17 16:14:58 INFO DAGScheduler: Got job 10 (countByKey at HoodieJavaPairRDD.java:105) with 2 output partitions | |
23/05/17 16:14:58 INFO DAGScheduler: Final stage: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) | |
23/05/17 16:14:58 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 14) | |
23/05/17 16:14:58 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 14) | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ShuffleMapStage 13 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_10 stored as values in memory (estimated size 25.2 KiB, free 364.0 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 12.1 KiB, free 363.9 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_10_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 12.1 KiB, free: 365.8 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 13 (MapPartitionsRDD[36] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 13.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 13.0 (TID 14) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 13.0 (TID 14) | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 2 (7.1 KiB) non-empty blocks including 2 (7.1 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:58 INFO Executor: Finished task 0.0 in stage 13.0 (TID 14). 1425 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 1.0 in stage 13.0 (TID 15) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4260 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 0.0 in stage 13.0 (TID 14) in 39 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:58 INFO Executor: Running task 1.0 in stage 13.0 (TID 15) | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 2 (4.8 KiB) non-empty blocks including 2 (4.8 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:58 INFO Executor: Finished task 1.0 in stage 13.0 (TID 15). 1425 bytes result sent to driver | |
23/05/17 16:14:58 INFO TaskSetManager: Finished task 1.0 in stage 13.0 (TID 15) in 20 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Removed TaskSet 13.0, whose tasks have all completed, from pool | |
23/05/17 16:14:58 INFO DAGScheduler: ShuffleMapStage 13 (mapToPair at HoodieJavaRDD.java:135) finished in 0.072 s | |
23/05/17 16:14:58 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:58 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:58 INFO DAGScheduler: waiting: Set(ResultStage 15, ShuffleMapStage 14) | |
23/05/17 16:14:58 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_11 stored as values in memory (estimated size 10.0 KiB, free 363.9 MiB) | |
23/05/17 16:14:58 INFO MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 5.2 KiB, free 363.9 MiB) | |
23/05/17 16:14:58 INFO BlockManagerInfo: Added broadcast_11_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 5.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:58 INFO SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:58 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 14 (MapPartitionsRDD[45] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:58 INFO TaskSchedulerImpl: Adding task set 14.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:58 INFO TaskSetManager: Starting task 0.0 in stage 14.0 (TID 16) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:58 INFO Executor: Running task 0.0 in stage 14.0 (TID 16) | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 1 (3.5 KiB) non-empty blocks including 1 (3.5 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:58 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:59 INFO MemoryStore: Block rdd_43_0 stored as values in memory (estimated size 8.2 KiB, free 363.9 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added rdd_43_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 8.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO Executor: Finished task 0.0 in stage 14.0 (TID 16). 1425 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 1.0 in stage 14.0 (TID 17) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 1.0 in stage 14.0 (TID 17) | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 0.0 in stage 14.0 (TID 16) in 38 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Getting 1 (2.4 KiB) non-empty blocks including 1 (2.4 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:59 INFO MemoryStore: Block rdd_43_1 stored as values in memory (estimated size 5.8 KiB, free 363.9 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added rdd_43_1 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 5.8 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO Executor: Finished task 1.0 in stage 14.0 (TID 17). 1425 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 1.0 in stage 14.0 (TID 17) in 23 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Removed TaskSet 14.0, whose tasks have all completed, from pool | |
23/05/17 16:14:59 INFO DAGScheduler: ShuffleMapStage 14 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.071 s | |
23/05/17 16:14:59 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:59 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:59 INFO DAGScheduler: waiting: Set(ResultStage 15) | |
23/05/17 16:14:59 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_12 stored as values in memory (estimated size 5.5 KiB, free 363.9 MiB) | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 363.9 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added broadcast_12_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 3.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 15 (ShuffledRDD[46] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Adding task set 15.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 1.0 in stage 15.0 (TID 18) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 1.0 in stage 15.0 (TID 18) | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Getting 2 (194.0 B) non-empty blocks including 2 (194.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:59 INFO Executor: Finished task 1.0 in stage 15.0 (TID 18). 1292 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 0.0 in stage 15.0 (TID 19) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 0.0 in stage 15.0 (TID 19) | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 1.0 in stage 15.0 (TID 18) in 11 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Getting 0 (0.0 B) non-empty blocks including 0 (0.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:14:59 INFO Executor: Finished task 0.0 in stage 15.0 (TID 19). 1235 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 0.0 in stage 15.0 (TID 19) in 7 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool | |
23/05/17 16:14:59 INFO DAGScheduler: ResultStage 15 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.024 s | |
23/05/17 16:14:59 INFO DAGScheduler: Job 10 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Killing all running tasks in stage 15: Stage finished | |
23/05/17 16:14:59 INFO DAGScheduler: Job 10 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.185290 s | |
23/05/17 16:14:59 INFO BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=99, numUpdates=0}, InputPartitionStat={2018/08/31=WorkloadStat {numInserts=99, numUpdates=0}}, OutputPartitionStat={}, operationType=UPSERT} | |
23/05/17 16:14:59 INFO UpsertPartitioner: AvgRecordSize => 1024 | |
23/05/17 16:14:59 INFO SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
23/05/17 16:14:59 INFO DAGScheduler: Got job 11 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
23/05/17 16:14:59 INFO DAGScheduler: Final stage: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) | |
23/05/17 16:14:59 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:14:59 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_13 stored as values in memory (estimated size 643.2 KiB, free 363.3 MiB) | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 240.9 KiB, free 363.0 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added broadcast_13_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 240.9 KiB, free: 365.5 MiB) | |
23/05/17 16:14:59 INFO SparkContext: Created broadcast 13 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 16 (MapPartitionsRDD[48] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Adding task set 16.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 0.0 in stage 16.0 (TID 20) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4344 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 0.0 in stage 16.0 (TID 20) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_7_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 240.4 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO Executor: Finished task 0.0 in stage 16.0 (TID 20). 928 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 0.0 in stage 16.0 (TID 20) in 93 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Removed TaskSet 16.0, whose tasks have all completed, from pool | |
23/05/17 16:14:59 INFO DAGScheduler: ResultStage 16 (collectAsMap at UpsertPartitioner.java:279) finished in 0.147 s | |
23/05/17 16:14:59 INFO DAGScheduler: Job 11 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Killing all running tasks in stage 16: Stage finished | |
23/05/17 16:14:59 INFO DAGScheduler: Job 11 finished: collectAsMap at UpsertPartitioner.java:279, took 0.149930 s | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_11_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 5.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:14:59 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:14:59 INFO UpsertPartitioner: For partitionPath : 2018/08/31 Small Files => [] | |
23/05/17 16:14:59 INFO UpsertPartitioner: After small file assignment: unassignedInserts => 99, totalInsertBuckets => 1, recordsPerBucket => 122880 | |
23/05/17 16:14:59 INFO UpsertPartitioner: Total insert buckets for partition path 2018/08/31 => [(InsertBucket {bucketNumber=0, weight=1.0},1.0)] | |
23/05/17 16:14:59 INFO UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=INSERT, fileIdPrefix=7f63e431-8db7-4efa-9b96-5fee5648f6ed, partitionPath=2018/08/31}}, | |
Partition to insert buckets => {2018/08/31=[(InsertBucket {bucketNumber=0, weight=1.0},1.0)]}, | |
UpdateLocations mapped to buckets =>{} | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_10_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 12.1 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/20230517161454289.commit.requested | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_12_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 3.2 KiB, free: 365.7 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_6_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 240.4 KiB, free: 366.0 MiB) | |
23/05/17 16:14:59 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/20230517161454289.inflight | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_9_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 3.2 KiB, free: 366.0 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Removed broadcast_8_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 12.8 KiB, free: 366.0 MiB) | |
23/05/17 16:14:59 INFO BaseSparkCommitActionExecutor: no validators configured. | |
23/05/17 16:14:59 INFO BaseCommitActionExecutor: Auto commit disabled for 20230517161454289 | |
23/05/17 16:14:59 INFO SparkContext: Starting job: sum at DeltaSync.java:696 | |
23/05/17 16:14:59 INFO DAGScheduler: Registering RDD 49 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 9 | |
23/05/17 16:14:59 INFO DAGScheduler: Got job 12 (sum at DeltaSync.java:696) with 1 output partitions | |
23/05/17 16:14:59 INFO DAGScheduler: Final stage: ResultStage 22 (sum at DeltaSync.java:696) | |
23/05/17 16:14:59 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 21) | |
23/05/17 16:14:59 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 21) | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_14 stored as values in memory (estimated size 648.5 KiB, free 364.2 MiB) | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 243.2 KiB, free 364.0 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added broadcast_14_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 243.2 KiB, free: 365.8 MiB) | |
23/05/17 16:14:59 INFO SparkContext: Created broadcast 14 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 21 (MapPartitionsRDD[49] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0, 1)) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Adding task set 21.0 with 2 tasks resource profile 0 | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 0.0 in stage 21.0 (TID 21) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 0.0 in stage 21.0 (TID 21) | |
23/05/17 16:14:59 INFO BlockManager: Found block rdd_43_0 locally | |
23/05/17 16:14:59 INFO Executor: Finished task 0.0 in stage 21.0 (TID 21). 1080 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 1.0 in stage 21.0 (TID 22) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 1, PROCESS_LOCAL, 4323 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 1.0 in stage 21.0 (TID 22) | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 0.0 in stage 21.0 (TID 21) in 40 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/2) | |
23/05/17 16:14:59 INFO BlockManager: Found block rdd_43_1 locally | |
23/05/17 16:14:59 INFO Executor: Finished task 1.0 in stage 21.0 (TID 22). 1080 bytes result sent to driver | |
23/05/17 16:14:59 INFO TaskSetManager: Finished task 1.0 in stage 21.0 (TID 22) in 31 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (2/2) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool | |
23/05/17 16:14:59 INFO DAGScheduler: ShuffleMapStage 21 (mapToPair at HoodieJavaRDD.java:135) finished in 0.120 s | |
23/05/17 16:14:59 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:14:59 INFO DAGScheduler: running: Set() | |
23/05/17 16:14:59 INFO DAGScheduler: waiting: Set(ResultStage 22) | |
23/05/17 16:14:59 INFO DAGScheduler: failed: Set() | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696), which has no missing parents | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_15 stored as values in memory (estimated size 656.0 KiB, free 363.4 MiB) | |
23/05/17 16:14:59 INFO MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 247.5 KiB, free 363.1 MiB) | |
23/05/17 16:14:59 INFO BlockManagerInfo: Added broadcast_15_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 247.5 KiB, free: 365.5 MiB) | |
23/05/17 16:14:59 INFO SparkContext: Created broadcast 15 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:14:59 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 22 (MapPartitionsRDD[54] at mapToDouble at DeltaSync.java:696) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:14:59 INFO TaskSchedulerImpl: Adding task set 22.0 with 1 tasks resource profile 0 | |
23/05/17 16:14:59 INFO TaskSetManager: Starting task 0.0 in stage 22.0 (TID 23) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:14:59 INFO Executor: Running task 0.0 in stage 22.0 (TID 23) | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Getting 2 (6.2 KiB) non-empty blocks including 2 (6.2 KiB) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:14:59 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms | |
23/05/17 16:14:59 INFO SimpleExecutor: Starting consumer, consuming records from the records iterator directly | |
23/05/17 16:14:59 WARN WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t161102. Falling back to direct markers. | |
23/05/17 16:14:59 INFO DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t161102/.hoodie/.temp/20230517161454289/2018/08/31/7f63e431-8db7-4efa-9b96-5fee5648f6ed-0_0-22-23_20230517161454289.parquet.marker.CREATE | |
23/05/17 16:14:59 INFO DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t161102/.hoodie/.temp/20230517161454289/2018/08/31/7f63e431-8db7-4efa-9b96-5fee5648f6ed-0_0-22-23_20230517161454289.parquet.marker.CREATE in 11 ms | |
23/05/17 16:14:59 INFO CodecPool: Got brand-new compressor [.gz] | |
23/05/17 16:15:00 INFO HoodieCreateHandle: New CreateHandle for partition :2018/08/31 with fileId 7f63e431-8db7-4efa-9b96-5fee5648f6ed-0 | |
23/05/17 16:15:00 INFO HoodieCreateHandle: Closing the file 7f63e431-8db7-4efa-9b96-5fee5648f6ed-0 as we are done with all the records 99 | |
23/05/17 16:15:00 INFO BlockManagerInfo: Removed broadcast_14_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 243.2 KiB, free: 365.8 MiB) | |
23/05/17 16:15:00 INFO BlockManagerInfo: Removed broadcast_13_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 240.9 KiB, free: 366.0 MiB) | |
23/05/17 16:15:00 INFO HoodieCreateHandle: CreateHandle for partitionPath 2018/08/31 fileID 7f63e431-8db7-4efa-9b96-5fee5648f6ed-0, took 833 ms. | |
23/05/17 16:15:00 INFO MemoryStore: Block rdd_53_0 stored as values in memory (estimated size 376.0 B, free 364.9 MiB) | |
23/05/17 16:15:00 INFO BlockManagerInfo: Added rdd_53_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 376.0 B, free: 366.0 MiB) | |
23/05/17 16:15:00 INFO Executor: Finished task 0.0 in stage 22.0 (TID 23). 1154 bytes result sent to driver | |
23/05/17 16:15:00 INFO TaskSetManager: Finished task 0.0 in stage 22.0 (TID 23) in 901 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Removed TaskSet 22.0, whose tasks have all completed, from pool | |
23/05/17 16:15:00 INFO DAGScheduler: ResultStage 22 (sum at DeltaSync.java:696) finished in 0.962 s | |
23/05/17 16:15:00 INFO DAGScheduler: Job 12 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Killing all running tasks in stage 22: Stage finished | |
23/05/17 16:15:00 INFO DAGScheduler: Job 12 finished: sum at DeltaSync.java:696, took 1.091221 s | |
23/05/17 16:15:00 INFO SparkContext: Starting job: sum at DeltaSync.java:697 | |
23/05/17 16:15:00 INFO DAGScheduler: Got job 13 (sum at DeltaSync.java:697) with 1 output partitions | |
23/05/17 16:15:00 INFO DAGScheduler: Final stage: ResultStage 28 (sum at DeltaSync.java:697) | |
23/05/17 16:15:00 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 27) | |
23/05/17 16:15:00 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:00 INFO DAGScheduler: Submitting ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697), which has no missing parents | |
23/05/17 16:15:00 INFO MemoryStore: Block broadcast_16 stored as values in memory (estimated size 656.0 KiB, free 364.2 MiB) | |
23/05/17 16:15:00 INFO MemoryStore: Block broadcast_16_piece0 stored as bytes in memory (estimated size 247.4 KiB, free 364.0 MiB) | |
23/05/17 16:15:00 INFO BlockManagerInfo: Added broadcast_16_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 247.4 KiB, free: 365.8 MiB) | |
23/05/17 16:15:00 INFO SparkContext: Created broadcast 16 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:00 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 28 (MapPartitionsRDD[56] at mapToDouble at DeltaSync.java:697) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Adding task set 28.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:00 INFO TaskSetManager: Starting task 0.0 in stage 28.0 (TID 24) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:00 INFO Executor: Running task 0.0 in stage 28.0 (TID 24) | |
23/05/17 16:15:00 INFO BlockManager: Found block rdd_53_0 locally | |
23/05/17 16:15:00 INFO Executor: Finished task 0.0 in stage 28.0 (TID 24). 896 bytes result sent to driver | |
23/05/17 16:15:00 INFO TaskSetManager: Finished task 0.0 in stage 28.0 (TID 24) in 60 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Removed TaskSet 28.0, whose tasks have all completed, from pool | |
23/05/17 16:15:00 INFO DAGScheduler: ResultStage 28 (sum at DeltaSync.java:697) finished in 0.117 s | |
23/05/17 16:15:00 INFO DAGScheduler: Job 13 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Killing all running tasks in stage 28: Stage finished | |
23/05/17 16:15:00 INFO DAGScheduler: Job 13 finished: sum at DeltaSync.java:697, took 0.123488 s | |
23/05/17 16:15:00 INFO SparkContext: Starting job: collect at SparkRDDWriteClient.java:101 | |
23/05/17 16:15:00 INFO DAGScheduler: Got job 14 (collect at SparkRDDWriteClient.java:101) with 1 output partitions | |
23/05/17 16:15:00 INFO DAGScheduler: Final stage: ResultStage 34 (collect at SparkRDDWriteClient.java:101) | |
23/05/17 16:15:00 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 33) | |
23/05/17 16:15:00 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:00 INFO DAGScheduler: Submitting ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101), which has no missing parents | |
23/05/17 16:15:00 INFO MemoryStore: Block broadcast_17 stored as values in memory (estimated size 656.2 KiB, free 363.3 MiB) | |
23/05/17 16:15:00 INFO MemoryStore: Block broadcast_17_piece0 stored as bytes in memory (estimated size 247.5 KiB, free 363.1 MiB) | |
23/05/17 16:15:00 INFO BlockManagerInfo: Added broadcast_17_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 247.5 KiB, free: 365.5 MiB) | |
23/05/17 16:15:00 INFO SparkContext: Created broadcast 17 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:00 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 34 (MapPartitionsRDD[58] at map at SparkRDDWriteClient.java:101) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Adding task set 34.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:00 INFO TaskSetManager: Starting task 0.0 in stage 34.0 (TID 25) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:00 INFO Executor: Running task 0.0 in stage 34.0 (TID 25) | |
23/05/17 16:15:00 INFO BlockManager: Found block rdd_53_0 locally | |
23/05/17 16:15:00 INFO Executor: Finished task 0.0 in stage 34.0 (TID 25). 1127 bytes result sent to driver | |
23/05/17 16:15:00 INFO TaskSetManager: Finished task 0.0 in stage 34.0 (TID 25) in 40 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Removed TaskSet 34.0, whose tasks have all completed, from pool | |
23/05/17 16:15:00 INFO DAGScheduler: ResultStage 34 (collect at SparkRDDWriteClient.java:101) finished in 0.089 s | |
23/05/17 16:15:00 INFO DAGScheduler: Job 14 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:00 INFO TaskSchedulerImpl: Killing all running tasks in stage 34: Stage finished | |
23/05/17 16:15:00 INFO DAGScheduler: Job 14 finished: collect at SparkRDDWriteClient.java:101, took 0.094309 s | |
23/05/17 16:15:00 INFO BaseHoodieWriteClient: Committing 20230517161454289 action commit | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__INFLIGHT]} | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:00 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:00 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:00 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:00 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:00 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:15:00 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:15:00 INFO CommitUtils: Creating metadata for UPSERT numWriteStats:1 numReplaceFileIds:0 | |
23/05/17 16:15:00 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:00 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__INFLIGHT]} | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:15:01 INFO BaseHoodieWriteClient: Committing 20230517161454289 action commit | |
23/05/17 16:15:01 WARN WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t161102. Falling back to direct markers. | |
23/05/17 16:15:01 INFO SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
23/05/17 16:15:01 INFO DAGScheduler: Got job 15 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
23/05/17 16:15:01 INFO DAGScheduler: Final stage: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) | |
23/05/17 16:15:01 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:15:01 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_18 stored as values in memory (estimated size 149.6 KiB, free 363.0 MiB) | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_18_piece0 stored as bytes in memory (estimated size 56.8 KiB, free 362.9 MiB) | |
23/05/17 16:15:01 INFO BlockManagerInfo: Added broadcast_18_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 56.8 KiB, free: 365.5 MiB) | |
23/05/17 16:15:01 INFO SparkContext: Created broadcast 18 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 35 (MapPartitionsRDD[60] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Adding task set 35.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:01 INFO TaskSetManager: Starting task 0.0 in stage 35.0 (TID 26) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:01 INFO Executor: Running task 0.0 in stage 35.0 (TID 26) | |
23/05/17 16:15:01 INFO Executor: Finished task 0.0 in stage 35.0 (TID 26). 892 bytes result sent to driver | |
23/05/17 16:15:01 INFO TaskSetManager: Finished task 0.0 in stage 35.0 (TID 26) in 18 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Removed TaskSet 35.0, whose tasks have all completed, from pool | |
23/05/17 16:15:01 INFO DAGScheduler: ResultStage 35 (collect at HoodieSparkEngineContext.java:137) finished in 0.036 s | |
23/05/17 16:15:01 INFO DAGScheduler: Job 15 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Killing all running tasks in stage 35: Stage finished | |
23/05/17 16:15:01 INFO DAGScheduler: Job 15 finished: collect at HoodieSparkEngineContext.java:137, took 0.039482 s | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Building file system view for partition (files) | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=4, StoreTimeTaken=1 | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO HoodieBackedTableMetadataWriter: Async metadata indexing enabled and following partitions already initialized: [files] | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO HoodieTableMetadataUtil: Updating at 20230517161454289 from Commit/UPSERT. #partitions_updated=2 | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO HoodieTableMetadataUtil: Loading latest file slices for metadata table partition files | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Building file system view for partition (files) | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
23/05/17 16:15:01 INFO BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
23/05/17 16:15:01 INFO BaseHoodieClient: Embedded Timeline Server is disabled. Not starting timeline service | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__INFLIGHT]} | |
23/05/17 16:15:01 INFO BaseHoodieWriteClient: Scheduling table service COMPACT | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO BaseHoodieWriteClient: Scheduling compaction at instant time :00000000000000001 | |
23/05/17 16:15:01 INFO ScheduleCompactionActionExecutor: Checking if compaction needs to be run on /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO CleanerUtils: Cleaned failed attempts if any | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[00000000000000__deltacommit__COMPLETED]} | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO BaseHoodieWriteClient: Generate a new instant time: 20230517161454289 action: deltacommit | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Creating a new instant [==>20230517161454289__deltacommit__REQUESTED] | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__deltacommit__REQUESTED]} | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO AsyncCleanerService: The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start. | |
23/05/17 16:15:01 INFO AsyncArchiveService: The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start. | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO SparkContext: Starting job: countByKey at HoodieJavaPairRDD.java:105 | |
23/05/17 16:15:01 INFO DAGScheduler: Registering RDD 66 (countByKey at HoodieJavaPairRDD.java:105) as input to shuffle 10 | |
23/05/17 16:15:01 INFO DAGScheduler: Got job 16 (countByKey at HoodieJavaPairRDD.java:105) with 1 output partitions | |
23/05/17 16:15:01 INFO DAGScheduler: Final stage: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) | |
23/05/17 16:15:01 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 36) | |
23/05/17 16:15:01 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 36) | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_19 stored as values in memory (estimated size 9.6 KiB, free 362.9 MiB) | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_19_piece0 stored as bytes in memory (estimated size 5.3 KiB, free 362.9 MiB) | |
23/05/17 16:15:01 INFO BlockManagerInfo: Added broadcast_19_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 5.3 KiB, free: 365.4 MiB) | |
23/05/17 16:15:01 INFO SparkContext: Created broadcast 19 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 36 (MapPartitionsRDD[66] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Adding task set 36.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:01 INFO TaskSetManager: Starting task 0.0 in stage 36.0 (TID 27) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:01 INFO Executor: Running task 0.0 in stage 36.0 (TID 27) | |
23/05/17 16:15:01 INFO MemoryStore: Block rdd_64_0 stored as values in memory (estimated size 380.0 B, free 362.9 MiB) | |
23/05/17 16:15:01 INFO BlockManagerInfo: Added rdd_64_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 380.0 B, free: 365.4 MiB) | |
23/05/17 16:15:01 INFO Executor: Finished task 0.0 in stage 36.0 (TID 27). 1123 bytes result sent to driver | |
23/05/17 16:15:01 INFO TaskSetManager: Finished task 0.0 in stage 36.0 (TID 27) in 23 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Removed TaskSet 36.0, whose tasks have all completed, from pool | |
23/05/17 16:15:01 INFO DAGScheduler: ShuffleMapStage 36 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.032 s | |
23/05/17 16:15:01 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:15:01 INFO DAGScheduler: running: Set() | |
23/05/17 16:15:01 INFO DAGScheduler: waiting: Set(ResultStage 37) | |
23/05/17 16:15:01 INFO DAGScheduler: failed: Set() | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105), which has no missing parents | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_20 stored as values in memory (estimated size 5.5 KiB, free 362.9 MiB) | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_20_piece0 stored as bytes in memory (estimated size 3.2 KiB, free 362.9 MiB) | |
23/05/17 16:15:01 INFO BlockManagerInfo: Added broadcast_20_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 3.2 KiB, free: 365.4 MiB) | |
23/05/17 16:15:01 INFO SparkContext: Created broadcast 20 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 37 (ShuffledRDD[67] at countByKey at HoodieJavaPairRDD.java:105) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Adding task set 37.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:01 INFO TaskSetManager: Starting task 0.0 in stage 37.0 (TID 28) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:01 INFO Executor: Running task 0.0 in stage 37.0 (TID 28) | |
23/05/17 16:15:01 INFO ShuffleBlockFetcherIterator: Getting 1 (117.0 B) non-empty blocks including 1 (117.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:15:01 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:15:01 INFO Executor: Finished task 0.0 in stage 37.0 (TID 28). 1311 bytes result sent to driver | |
23/05/17 16:15:01 INFO TaskSetManager: Finished task 0.0 in stage 37.0 (TID 28) in 14 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Removed TaskSet 37.0, whose tasks have all completed, from pool | |
23/05/17 16:15:01 INFO DAGScheduler: ResultStage 37 (countByKey at HoodieJavaPairRDD.java:105) finished in 0.021 s | |
23/05/17 16:15:01 INFO DAGScheduler: Job 16 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Killing all running tasks in stage 37: Stage finished | |
23/05/17 16:15:01 INFO DAGScheduler: Job 16 finished: countByKey at HoodieJavaPairRDD.java:105, took 0.060359 s | |
23/05/17 16:15:01 INFO BaseSparkCommitActionExecutor: Input workload profile :WorkloadProfile {globalStat=WorkloadStat {numInserts=0, numUpdates=2}, InputPartitionStat={files=WorkloadStat {numInserts=0, numUpdates=2}}, OutputPartitionStat={}, operationType=UPSERT_PREPPED} | |
23/05/17 16:15:01 INFO UpsertPartitioner: AvgRecordSize => 1024 | |
23/05/17 16:15:01 INFO SparkContext: Starting job: collectAsMap at UpsertPartitioner.java:279 | |
23/05/17 16:15:01 INFO DAGScheduler: Got job 17 (collectAsMap at UpsertPartitioner.java:279) with 1 output partitions | |
23/05/17 16:15:01 INFO DAGScheduler: Final stage: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) | |
23/05/17 16:15:01 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:15:01 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278), which has no missing parents | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_21 stored as values in memory (estimated size 494.3 KiB, free 362.4 MiB) | |
23/05/17 16:15:01 INFO MemoryStore: Block broadcast_21_piece0 stored as bytes in memory (estimated size 184.5 KiB, free 362.2 MiB) | |
23/05/17 16:15:01 INFO BlockManagerInfo: Added broadcast_21_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 184.5 KiB, free: 365.3 MiB) | |
23/05/17 16:15:01 INFO SparkContext: Created broadcast 21 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:01 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 38 (MapPartitionsRDD[69] at mapToPair at UpsertPartitioner.java:278) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Adding task set 38.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:01 INFO TaskSetManager: Starting task 0.0 in stage 38.0 (TID 29) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4339 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:01 INFO Executor: Running task 0.0 in stage 38.0 (TID 29) | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:01 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Building file system view for partition (files) | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
23/05/17 16:15:01 INFO Executor: Finished task 0.0 in stage 38.0 (TID 29). 837 bytes result sent to driver | |
23/05/17 16:15:01 INFO TaskSetManager: Finished task 0.0 in stage 38.0 (TID 29) in 38 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Removed TaskSet 38.0, whose tasks have all completed, from pool | |
23/05/17 16:15:01 INFO DAGScheduler: ResultStage 38 (collectAsMap at UpsertPartitioner.java:279) finished in 0.076 s | |
23/05/17 16:15:01 INFO DAGScheduler: Job 17 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:01 INFO TaskSchedulerImpl: Killing all running tasks in stage 38: Stage finished | |
23/05/17 16:15:01 INFO DAGScheduler: Job 17 finished: collectAsMap at UpsertPartitioner.java:279, took 0.079295 s | |
23/05/17 16:15:01 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:01 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:01 INFO UpsertPartitioner: Total Buckets :1, buckets info => {0=BucketInfo {bucketType=UPDATE, fileIdPrefix=files-0000, partitionPath=files}}, | |
Partition to insert buckets => {}, | |
UpdateLocations mapped to buckets =>{files-0000=0} | |
23/05/17 16:15:01 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/20230517161454289.deltacommit.requested | |
23/05/17 16:15:01 INFO FileIOUtils: Created a new file in meta path: /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/20230517161454289.deltacommit.inflight | |
23/05/17 16:15:02 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/20230517161454289.deltacommit.inflight | |
23/05/17 16:15:02 INFO BaseSparkCommitActionExecutor: no validators configured. | |
23/05/17 16:15:02 INFO BaseCommitActionExecutor: Auto commit enabled: Committing 20230517161454289 | |
23/05/17 16:15:02 INFO SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
23/05/17 16:15:02 INFO DAGScheduler: Registering RDD 70 (mapToPair at HoodieJavaRDD.java:135) as input to shuffle 11 | |
23/05/17 16:15:02 INFO DAGScheduler: Got job 18 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
23/05/17 16:15:02 INFO DAGScheduler: Final stage: ResultStage 40 (collect at HoodieJavaRDD.java:163) | |
23/05/17 16:15:02 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 39) | |
23/05/17 16:15:02 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 39) | |
23/05/17 16:15:02 INFO DAGScheduler: Submitting ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135), which has no missing parents | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_16_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 247.4 KiB, free: 365.5 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_18_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 56.8 KiB, free: 365.6 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_17_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 247.5 KiB, free: 365.8 MiB) | |
23/05/17 16:15:02 INFO MemoryStore: Block broadcast_22 stored as values in memory (estimated size 498.5 KiB, free 363.7 MiB) | |
23/05/17 16:15:02 INFO MemoryStore: Block broadcast_22_piece0 stored as bytes in memory (estimated size 185.9 KiB, free 363.5 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Added broadcast_22_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 185.9 KiB, free: 365.6 MiB) | |
23/05/17 16:15:02 INFO SparkContext: Created broadcast 22 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:02 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 39 (MapPartitionsRDD[70] at mapToPair at HoodieJavaRDD.java:135) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:02 INFO TaskSchedulerImpl: Adding task set 39.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:02 INFO TaskSetManager: Starting task 0.0 in stage 39.0 (TID 30) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4687 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:02 INFO Executor: Running task 0.0 in stage 39.0 (TID 30) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_21_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 184.5 KiB, free: 365.8 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_20_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 3.2 KiB, free: 365.8 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_15_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 247.5 KiB, free: 366.0 MiB) | |
23/05/17 16:15:02 INFO BlockManager: Found block rdd_64_0 locally | |
23/05/17 16:15:02 INFO Executor: Finished task 0.0 in stage 39.0 (TID 30). 1080 bytes result sent to driver | |
23/05/17 16:15:02 INFO TaskSetManager: Finished task 0.0 in stage 39.0 (TID 30) in 29 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:02 INFO TaskSchedulerImpl: Removed TaskSet 39.0, whose tasks have all completed, from pool | |
23/05/17 16:15:02 INFO DAGScheduler: ShuffleMapStage 39 (mapToPair at HoodieJavaRDD.java:135) finished in 0.075 s | |
23/05/17 16:15:02 INFO BlockManagerInfo: Removed broadcast_19_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 5.3 KiB, free: 366.1 MiB) | |
23/05/17 16:15:02 INFO DAGScheduler: looking for newly runnable stages | |
23/05/17 16:15:02 INFO DAGScheduler: running: Set() | |
23/05/17 16:15:02 INFO DAGScheduler: waiting: Set(ResultStage 40) | |
23/05/17 16:15:02 INFO DAGScheduler: failed: Set() | |
23/05/17 16:15:02 INFO DAGScheduler: Submitting ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111), which has no missing parents | |
23/05/17 16:15:02 INFO MemoryStore: Block broadcast_23 stored as values in memory (estimated size 649.4 KiB, free 364.4 MiB) | |
23/05/17 16:15:02 INFO MemoryStore: Block broadcast_23_piece0 stored as bytes in memory (estimated size 244.3 KiB, free 364.2 MiB) | |
23/05/17 16:15:02 INFO BlockManagerInfo: Added broadcast_23_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 244.3 KiB, free: 365.8 MiB) | |
23/05/17 16:15:02 INFO SparkContext: Created broadcast 23 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:02 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 40 (MapPartitionsRDD[75] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:02 INFO TaskSchedulerImpl: Adding task set 40.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:02 INFO TaskSetManager: Starting task 0.0 in stage 40.0 (TID 31) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, NODE_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:02 INFO Executor: Running task 0.0 in stage 40.0 (TID 31) | |
23/05/17 16:15:02 INFO ShuffleBlockFetcherIterator: Getting 1 (334.0 B) non-empty blocks including 1 (334.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) push-merged-local and 0 (0.0 B) remote blocks | |
23/05/17 16:15:02 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms | |
23/05/17 16:15:02 INFO BaseSparkDeltaCommitActionExecutor: Merging updates for commit 20230517161454289 for file files-0000 | |
23/05/17 16:15:02 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:02 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:02 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:02 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:02 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:02 INFO AbstractTableFileSystemView: Building file system view for partition (files) | |
23/05/17 16:15:02 INFO AbstractTableFileSystemView: addFilesToView: NumFiles=1, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed.] | |
23/05/17 16:15:03 INFO DirectWriteMarkers: Creating Marker Path=/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/.temp/20230517161454289/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND | |
23/05/17 16:15:03 INFO DirectWriteMarkers: [direct] Created marker file /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/.temp/20230517161454289/files/files-0000_0-40-31_00000000000000.hfile.marker.APPEND in 10 ms | |
23/05/17 16:15:03 INFO HoodieLogFormat$WriterBuilder: Building HoodieLogFormat Writer | |
23/05/17 16:15:03 INFO HoodieLogFormat$WriterBuilder: HoodieLogFile on path /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
23/05/17 16:15:03 INFO HoodieLogFormatWriter: HoodieLogFile{pathStr='/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=124} exists. Appending to existing file | |
23/05/17 16:15:04 INFO MetricsConfig: Loaded properties from hadoop-metrics2.properties | |
23/05/17 16:15:04 INFO MetricsSystemImpl: Scheduled Metric snapshot period at 300 second(s). | |
23/05/17 16:15:04 INFO MetricsSystemImpl: HBase metrics system started | |
23/05/17 16:15:04 INFO MetricRegistries: Loaded MetricRegistries class org.apache.hudi.org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl | |
23/05/17 16:15:04 INFO CodecPool: Got brand-new compressor [.gz] | |
23/05/17 16:15:04 INFO CodecPool: Got brand-new compressor [.gz] | |
23/05/17 16:15:04 INFO HoodieAppendHandle: AppendHandle for partitionPath files filePath files/.files-0000_00000000000000.log.1_0-0-0, took 1895 ms. | |
23/05/17 16:15:04 INFO MemoryStore: Block rdd_74_0 stored as values in memory (estimated size 381.0 B, free 364.2 MiB) | |
23/05/17 16:15:04 INFO BlockManagerInfo: Added rdd_74_0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 381.0 B, free: 365.8 MiB) | |
23/05/17 16:15:04 INFO Executor: Finished task 0.0 in stage 40.0 (TID 31). 1523 bytes result sent to driver | |
23/05/17 16:15:04 INFO TaskSetManager: Finished task 0.0 in stage 40.0 (TID 31) in 2372 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:04 INFO TaskSchedulerImpl: Removed TaskSet 40.0, whose tasks have all completed, from pool | |
23/05/17 16:15:04 INFO DAGScheduler: ResultStage 40 (collect at HoodieJavaRDD.java:163) finished in 2.421 s | |
23/05/17 16:15:04 INFO DAGScheduler: Job 18 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:04 INFO TaskSchedulerImpl: Killing all running tasks in stage 40: Stage finished | |
23/05/17 16:15:04 INFO DAGScheduler: Job 18 finished: collect at HoodieJavaRDD.java:163, took 2.502709 s | |
23/05/17 16:15:04 INFO CommitUtils: Creating metadata for UPSERT_PREPPED numWriteStats:1 numReplaceFileIds:0 | |
23/05/17 16:15:04 INFO BlockManagerInfo: Removed broadcast_22_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 185.9 KiB, free: 366.0 MiB) | |
23/05/17 16:15:04 INFO SparkContext: Starting job: collect at HoodieJavaRDD.java:163 | |
23/05/17 16:15:04 INFO DAGScheduler: Got job 19 (collect at HoodieJavaRDD.java:163) with 1 output partitions | |
23/05/17 16:15:04 INFO DAGScheduler: Final stage: ResultStage 42 (collect at HoodieJavaRDD.java:163) | |
23/05/17 16:15:04 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 41) | |
23/05/17 16:15:04 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:04 INFO DAGScheduler: Submitting ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111), which has no missing parents | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_24 stored as values in memory (estimated size 649.4 KiB, free 364.2 MiB) | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_24_piece0 stored as bytes in memory (estimated size 244.3 KiB, free 364.0 MiB) | |
23/05/17 16:15:05 INFO BlockManagerInfo: Added broadcast_24_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 244.3 KiB, free: 365.8 MiB) | |
23/05/17 16:15:05 INFO SparkContext: Created broadcast 24 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 42 (MapPartitionsRDD[76] at map at HoodieJavaRDD.java:111) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Adding task set 42.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:05 INFO TaskSetManager: Starting task 0.0 in stage 42.0 (TID 32) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:05 INFO Executor: Running task 0.0 in stage 42.0 (TID 32) | |
23/05/17 16:15:05 INFO BlockManager: Found block rdd_74_0 locally | |
23/05/17 16:15:05 INFO Executor: Finished task 0.0 in stage 42.0 (TID 32). 1136 bytes result sent to driver | |
23/05/17 16:15:05 INFO TaskSetManager: Finished task 0.0 in stage 42.0 (TID 32) in 23 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Removed TaskSet 42.0, whose tasks have all completed, from pool | |
23/05/17 16:15:05 INFO DAGScheduler: ResultStage 42 (collect at HoodieJavaRDD.java:163) finished in 0.073 s | |
23/05/17 16:15:05 INFO DAGScheduler: Job 19 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Killing all running tasks in stage 42: Stage finished | |
23/05/17 16:15:05 INFO DAGScheduler: Job 19 finished: collect at HoodieJavaRDD.java:163, took 0.077450 s | |
23/05/17 16:15:05 INFO BaseSparkCommitActionExecutor: Committing 20230517161454289, action Type deltacommit, operation Type UPSERT_PREPPED | |
23/05/17 16:15:05 INFO SparkContext: Starting job: collect at HoodieSparkEngineContext.java:137 | |
23/05/17 16:15:05 INFO DAGScheduler: Got job 20 (collect at HoodieSparkEngineContext.java:137) with 1 output partitions | |
23/05/17 16:15:05 INFO DAGScheduler: Final stage: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) | |
23/05/17 16:15:05 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:15:05 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137), which has no missing parents | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_25 stored as values in memory (estimated size 149.6 KiB, free 363.9 MiB) | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_25_piece0 stored as bytes in memory (estimated size 56.8 KiB, free 363.8 MiB) | |
23/05/17 16:15:05 INFO BlockManagerInfo: Added broadcast_25_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 56.8 KiB, free: 365.7 MiB) | |
23/05/17 16:15:05 INFO SparkContext: Created broadcast 25 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 43 (MapPartitionsRDD[78] at flatMap at HoodieSparkEngineContext.java:137) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Adding task set 43.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:05 INFO TaskSetManager: Starting task 0.0 in stage 43.0 (TID 33) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:05 INFO Executor: Running task 0.0 in stage 43.0 (TID 33) | |
23/05/17 16:15:05 INFO Executor: Finished task 0.0 in stage 43.0 (TID 33). 805 bytes result sent to driver | |
23/05/17 16:15:05 INFO TaskSetManager: Finished task 0.0 in stage 43.0 (TID 33) in 13 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Removed TaskSet 43.0, whose tasks have all completed, from pool | |
23/05/17 16:15:05 INFO DAGScheduler: ResultStage 43 (collect at HoodieSparkEngineContext.java:137) finished in 0.035 s | |
23/05/17 16:15:05 INFO DAGScheduler: Job 20 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Killing all running tasks in stage 43: Stage finished | |
23/05/17 16:15:05 INFO DAGScheduler: Job 20 finished: collect at HoodieSparkEngineContext.java:137, took 0.038779 s | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Marking instant complete [==>20230517161454289__deltacommit__INFLIGHT] | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/20230517161454289.deltacommit.inflight | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/20230517161454289.deltacommit | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Completed [==>20230517161454289__deltacommit__INFLIGHT] | |
23/05/17 16:15:05 INFO BaseSparkCommitActionExecutor: Committed 20230517161454289 | |
23/05/17 16:15:05 INFO SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
23/05/17 16:15:05 INFO DAGScheduler: Got job 21 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
23/05/17 16:15:05 INFO DAGScheduler: Final stage: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) | |
23/05/17 16:15:05 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:15:05 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_26 stored as values in memory (estimated size 149.8 KiB, free 363.6 MiB) | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_26_piece0 stored as bytes in memory (estimated size 56.9 KiB, free 363.6 MiB) | |
23/05/17 16:15:05 INFO BlockManagerInfo: Added broadcast_26_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 56.9 KiB, free: 365.6 MiB) | |
23/05/17 16:15:05 INFO SparkContext: Created broadcast 26 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 44 (MapPartitionsRDD[80] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Adding task set 44.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:05 INFO TaskSetManager: Starting task 0.0 in stage 44.0 (TID 34) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4489 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:05 INFO Executor: Running task 0.0 in stage 44.0 (TID 34) | |
23/05/17 16:15:05 INFO Executor: Finished task 0.0 in stage 44.0 (TID 34). 966 bytes result sent to driver | |
23/05/17 16:15:05 INFO TaskSetManager: Finished task 0.0 in stage 44.0 (TID 34) in 11 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Removed TaskSet 44.0, whose tasks have all completed, from pool | |
23/05/17 16:15:05 INFO DAGScheduler: ResultStage 44 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.030 s | |
23/05/17 16:15:05 INFO DAGScheduler: Job 21 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Killing all running tasks in stage 44: Stage finished | |
23/05/17 16:15:05 INFO DAGScheduler: Job 21 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.031799 s | |
23/05/17 16:15:05 INFO FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/.temp/20230517161454289 | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:05 INFO SparkContext: Starting job: collect at SparkHoodieBackedTableMetadataWriter.java:185 | |
23/05/17 16:15:05 INFO DAGScheduler: Got job 22 (collect at SparkHoodieBackedTableMetadataWriter.java:185) with 1 output partitions | |
23/05/17 16:15:05 INFO DAGScheduler: Final stage: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) | |
23/05/17 16:15:05 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 45) | |
23/05/17 16:15:05 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255), which has no missing parents | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_27 stored as values in memory (estimated size 648.9 KiB, free 363.0 MiB) | |
23/05/17 16:15:05 INFO MemoryStore: Block broadcast_27_piece0 stored as bytes in memory (estimated size 244.3 KiB, free 362.7 MiB) | |
23/05/17 16:15:05 INFO BlockManagerInfo: Added broadcast_27_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 244.3 KiB, free: 365.4 MiB) | |
23/05/17 16:15:05 INFO SparkContext: Created broadcast 27 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:05 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 46 (MapPartitionsRDD[74] at flatMap at BaseSparkCommitActionExecutor.java:255) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Adding task set 46.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:05 INFO TaskSetManager: Starting task 0.0 in stage 46.0 (TID 35) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4271 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:05 INFO Executor: Running task 0.0 in stage 46.0 (TID 35) | |
23/05/17 16:15:05 INFO BlockManager: Found block rdd_74_0 locally | |
23/05/17 16:15:05 INFO Executor: Finished task 0.0 in stage 46.0 (TID 35). 1232 bytes result sent to driver | |
23/05/17 16:15:05 INFO TaskSetManager: Finished task 0.0 in stage 46.0 (TID 35) in 32 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Removed TaskSet 46.0, whose tasks have all completed, from pool | |
23/05/17 16:15:05 INFO DAGScheduler: ResultStage 46 (collect at SparkHoodieBackedTableMetadataWriter.java:185) finished in 0.080 s | |
23/05/17 16:15:05 INFO DAGScheduler: Job 22 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:05 INFO TaskSchedulerImpl: Killing all running tasks in stage 46: Stage finished | |
23/05/17 16:15:05 INFO DAGScheduler: Job 22 finished: collect at SparkHoodieBackedTableMetadataWriter.java:185, took 0.083258 s | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:05 INFO BaseHoodieWriteClient: Cleaner started | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:05 INFO BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517161454289002 | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:05 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:05 INFO CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
23/05/17 16:15:05 INFO CleanPlanner: Nothing to clean here. It is already clean | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating View Manager with storage type :MEMORY | |
23/05/17 16:15:05 INFO FileSystemViewManager: Creating in-memory based Table View | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:05 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:05 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[==>20230517161454289__commit__INFLIGHT]} | |
23/05/17 16:15:05 INFO HoodieTimelineArchiver: No Instants to archive | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Marking instant complete [==>20230517161454289__commit__INFLIGHT] | |
23/05/17 16:15:05 INFO HoodieActiveTimeline: Checking for file exists ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/20230517161454289.inflight | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Create new file for toInstant ?/tmp/deltastreamertest/stocks20230517t161102/.hoodie/20230517161454289.commit | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Completed [==>20230517161454289__commit__INFLIGHT] | |
23/05/17 16:15:06 WARN WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path /tmp/deltastreamertest/stocks20230517t161102. Falling back to direct markers. | |
23/05/17 16:15:06 INFO SparkContext: Starting job: collectAsMap at HoodieSparkEngineContext.java:151 | |
23/05/17 16:15:06 INFO DAGScheduler: Got job 23 (collectAsMap at HoodieSparkEngineContext.java:151) with 1 output partitions | |
23/05/17 16:15:06 INFO DAGScheduler: Final stage: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) | |
23/05/17 16:15:06 INFO DAGScheduler: Parents of final stage: List() | |
23/05/17 16:15:06 INFO DAGScheduler: Missing parents: List() | |
23/05/17 16:15:06 INFO DAGScheduler: Submitting ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148), which has no missing parents | |
23/05/17 16:15:06 INFO MemoryStore: Block broadcast_28 stored as values in memory (estimated size 149.8 KiB, free 362.6 MiB) | |
23/05/17 16:15:06 INFO MemoryStore: Block broadcast_28_piece0 stored as bytes in memory (estimated size 56.9 KiB, free 362.5 MiB) | |
23/05/17 16:15:06 INFO BlockManagerInfo: Added broadcast_28_piece0 in memory on ip-172-31-19-77.us-east-2.compute.internal:35347 (size: 56.9 KiB, free: 365.4 MiB) | |
23/05/17 16:15:06 INFO SparkContext: Created broadcast 28 from broadcast at DAGScheduler.scala:1513 | |
23/05/17 16:15:06 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 47 (MapPartitionsRDD[82] at mapToPair at HoodieSparkEngineContext.java:148) (first 15 tasks are for partitions Vector(0)) | |
23/05/17 16:15:06 INFO TaskSchedulerImpl: Adding task set 47.0 with 1 tasks resource profile 0 | |
23/05/17 16:15:06 INFO TaskSetManager: Starting task 0.0 in stage 47.0 (TID 36) (ip-172-31-19-77.us-east-2.compute.internal, executor driver, partition 0, PROCESS_LOCAL, 4471 bytes) taskResourceAssignments Map() | |
23/05/17 16:15:06 INFO Executor: Running task 0.0 in stage 47.0 (TID 36) | |
23/05/17 16:15:06 INFO Executor: Finished task 0.0 in stage 47.0 (TID 36). 948 bytes result sent to driver | |
23/05/17 16:15:06 INFO TaskSetManager: Finished task 0.0 in stage 47.0 (TID 36) in 12 ms on ip-172-31-19-77.us-east-2.compute.internal (executor driver) (1/1) | |
23/05/17 16:15:06 INFO TaskSchedulerImpl: Removed TaskSet 47.0, whose tasks have all completed, from pool | |
23/05/17 16:15:06 INFO DAGScheduler: ResultStage 47 (collectAsMap at HoodieSparkEngineContext.java:151) finished in 0.032 s | |
23/05/17 16:15:06 INFO DAGScheduler: Job 23 is finished. Cancelling potential speculative or zombie tasks for this job | |
23/05/17 16:15:06 INFO TaskSchedulerImpl: Killing all running tasks in stage 47: Stage finished | |
23/05/17 16:15:06 INFO DAGScheduler: Job 23 finished: collectAsMap at HoodieSparkEngineContext.java:151, took 0.035005 s | |
23/05/17 16:15:06 INFO FSUtils: Removed directory at /tmp/deltastreamertest/stocks20230517t161102/.hoodie/.temp/20230517161454289 | |
23/05/17 16:15:06 INFO BaseHoodieWriteClient: Committed 20230517161454289 | |
23/05/17 16:15:06 INFO MapPartitionsRDD: Removing RDD 74 from persistence list | |
23/05/17 16:15:06 INFO MapPartitionsRDD: Removing RDD 53 from persistence list | |
23/05/17 16:15:06 INFO UnionRDD: Removing RDD 64 from persistence list | |
23/05/17 16:15:06 INFO MapPartitionsRDD: Removing RDD 43 from persistence list | |
23/05/17 16:15:06 INFO BaseHoodieWriteClient: Start to clean synchronously. | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 74 | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 53 | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 64 | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 43 | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:06 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:06 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:15:06 INFO BaseHoodieWriteClient: Cleaner started | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:06 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:06 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:06 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:06 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:06 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:15:06 INFO BaseHoodieWriteClient: Scheduling cleaning at instant time :20230517161506449 | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t161102. Server=ip-172-31-19-77.us-east-2.compute.internal:36617, Timeout=300 | |
23/05/17 16:15:06 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:06 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:06 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:06 INFO RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:36617/v1/hoodie/view/compactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t161102&lastinstantts=20230517161454289&timelinehash=5b38d5fd66447f3cd84c5cead3df096c232da29daf23f9133ba4a780ab25dc01) | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_23_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 244.3 KiB, free: 365.6 MiB) | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_26_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 56.9 KiB, free: 365.7 MiB) | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_25_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 56.8 KiB, free: 365.7 MiB) | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 64 | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_28_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 56.9 KiB, free: 365.8 MiB) | |
23/05/17 16:15:06 INFO BlockManager: Removing RDD 74 | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_27_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 244.3 KiB, free: 366.0 MiB) | |
23/05/17 16:15:06 INFO BlockManagerInfo: Removed broadcast_24_piece0 on ip-172-31-19-77.us-east-2.compute.internal:35347 in memory (size: 244.3 KiB, free: 366.2 MiB) | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:36617/v1/hoodie/view/logcompactions/pending/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t161102&lastinstantts=20230517161454289&timelinehash=5b38d5fd66447f3cd84c5cead3df096c232da29daf23f9133ba4a780ab25dc01) | |
23/05/17 16:15:07 INFO CleanPlanner: No earliest commit to retain. No need to scan partitions !! | |
23/05/17 16:15:07 INFO CleanPlanner: Nothing to clean here. It is already clean | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO FileSystemViewManager: Creating View Manager with storage type :REMOTE_FIRST | |
23/05/17 16:15:07 INFO FileSystemViewManager: Creating remote first table view | |
23/05/17 16:15:07 INFO BaseHoodieWriteClient: Start to archive synchronously. | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO HoodieTimelineArchiver: Not archiving as there is no compaction yet on the metadata table | |
23/05/17 16:15:07 INFO HoodieTimelineArchiver: No Instants to archive | |
23/05/17 16:15:07 INFO FileSystemViewManager: Creating remote view for basePath /tmp/deltastreamertest/stocks20230517t161102. Server=ip-172-31-19-77.us-east-2.compute.internal:36617, Timeout=300 | |
23/05/17 16:15:07 INFO FileSystemViewManager: Creating InMemory based view for basePath /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 1 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO RemoteHoodieTableFileSystemView: Sending request : (http://ip-172-31-19-77.us-east-2.compute.internal:36617/v1/hoodie/view/refresh/?basepath=%2Ftmp%2Fdeltastreamertest%2Fstocks20230517t161102&lastinstantts=20230517161454289&timelinehash=5b38d5fd66447f3cd84c5cead3df096c232da29daf23f9133ba4a780ab25dc01) | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:07 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:07 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:07 INFO DeltaSync: Commit 20230517161454289 successful! | |
23/05/17 16:15:07 INFO HiveConf: Found configuration file file:/home/hadoop/spark-3.3.2-bin-hadoop3/conf/hive-site.xml | |
23/05/17 16:15:07 WARN HiveConf: HiveConf of name hive.server2.thrift.url does not exist | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieTableMetaClient: Loading Active commit timeline for /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:07 INFO metastore: Trying to connect to metastore with URI thrift://ip-172-31-19-77.us-east-2.compute.internal:9083 | |
23/05/17 16:15:07 INFO metastore: Opened a connection to metastore, current connections: 1 | |
23/05/17 16:15:07 INFO metastore: Connected to metastore. | |
23/05/17 16:15:07 INFO HiveSyncTool: Syncing target hoodie table with hive table(default.stocks20230517t161102). Hive metastore URL from HiveConf:thrift://ip-172-31-19-77.us-east-2.compute.internal:9083). Hive metastore URL from HiveSyncConfig:null, basePath :/tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:07 INFO HiveSyncTool: Trying to sync hoodie table stocks20230517t161102 with base path /tmp/deltastreamertest/stocks20230517t161102 of type COPY_ON_WRITE | |
23/05/17 16:15:07 INFO TableSchemaResolver: Reading schema from /tmp/deltastreamertest/stocks20230517t161102/2018/08/31/7f63e431-8db7-4efa-9b96-5fee5648f6ed-0_0-22-23_20230517161454289.parquet | |
23/05/17 16:15:07 INFO HiveSyncTool: Hive table stocks20230517t161102 is not found. Creating it with schema message stock_ticks { | |
optional binary _hoodie_commit_time (STRING); | |
optional binary _hoodie_commit_seqno (STRING); | |
optional binary _hoodie_record_key (STRING); | |
optional binary _hoodie_partition_path (STRING); | |
optional binary _hoodie_file_name (STRING); | |
required int64 volume; | |
required binary ts (STRING); | |
required binary symbol (STRING); | |
required int32 year; | |
required binary month (STRING); | |
required double high; | |
required double low; | |
required binary key (STRING); | |
required binary date (STRING); | |
required double close; | |
required double open; | |
required binary day (STRING); | |
} | |
23/05/17 16:15:08 INFO HoodieHiveSyncClient: No comment difference of stocks20230517t161102 | |
23/05/17 16:15:08 INFO HiveSyncTool: Schema sync complete. Syncing partitions for stocks20230517t161102 | |
23/05/17 16:15:08 INFO HiveSyncTool: Last commit time synced was found to be null | |
23/05/17 16:15:08 INFO HiveSyncTool: Sync all partitions given the last commit time synced is empty or before the start of the active timeline. Listing all partitions in /tmp/deltastreamertest/stocks20230517t161102, file system: DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_-865241332_1, ugi=hadoop (auth:SIMPLE)]] | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:08 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/hoodie.properties | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Finished Loading Table of type COPY_ON_WRITE(version=1, baseFileFormat=PARQUET) from /tmp/deltastreamertest/stocks20230517t161102 | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:08 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:08 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:08 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:08 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:08 INFO HoodieTableMetadataUtil: Loading latest merged file slices for metadata table partition files | |
23/05/17 16:15:08 INFO AbstractTableFileSystemView: Took 0 ms to read 0 instants, 0 replaced file groups | |
23/05/17 16:15:08 INFO ClusteringUtils: Found 0 files in pending clustering operations | |
23/05/17 16:15:08 INFO AbstractTableFileSystemView: Building file system view for partition (files) | |
23/05/17 16:15:08 INFO AbstractTableFileSystemView: addFilesToView: NumFiles=2, NumFileGroups=1, FileGroupsCreationTime=1, StoreTimeTaken=0 | |
23/05/17 16:15:08 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__commit__COMPLETED]} | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Loading HoodieTableMetaClient from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:08 INFO HoodieTableConfig: Loading table properties from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/.hoodie/hoodie.properties | |
23/05/17 16:15:08 INFO HoodieTableMetaClient: Finished Loading Table of type MERGE_ON_READ(version=1, baseFileFormat=HFILE) from /tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata | |
23/05/17 16:15:08 INFO HoodieActiveTimeline: Loaded instants upto : Option{val=[20230517161454289__deltacommit__COMPLETED]} | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Reading a delete block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Scanning log file HoodieLogFile{pathStr='hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0', fileLen=-1} | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Reading a data block from file hdfs://ip-172-31-19-77.us-east-2.compute.internal:8020/tmp/deltastreamertest/stocks20230517t161102/.hoodie/metadata/files/.files-0000_00000000000000.log.1_0-0-0 at instant 20230517161454289 | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Merging the final data blocks | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 2 | |
23/05/17 16:15:08 INFO AbstractHoodieLogRecordReader: Number of remaining logblocks to merge 1 | |
23/05/17 16:15:08 INFO CodecPool: Got brand-new decompressor [.gz] | |
23/05/17 16:15:08 INFO CodecPool: Got brand-new decompressor [.gz] | |
23/05/17 16:15:08 INFO CodecPool: Got brand-new decompressor [.gz] | |
23/05/17 16:15:08 INFO CodecPool: Got brand-new decompressor [.gz] | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: Number of log files scanned => 1 | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: MaxMemoryInBytes allowed for compaction => 1073741824 | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: Number of entries in MemoryBasedMap in ExternalSpillableMap => 2 | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: Total size in bytes of MemoryBasedMap in ExternalSpillableMap => 1248 | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: Number of entries in BitCaskDiskMap in ExternalSpillableMap => 0 | |
23/05/17 16:15:08 INFO HoodieMergedLogRecordScanner: Size of file spilled to disk => 0 | |
23/05/17 16:15:08 INFO HoodieBackedTableMetadata: Opened 1 metadata log files (dataset instant=20230517161454289, metadata instant=20230517161454289) in 237 ms | |
23/05/17 16:15:08 INFO BaseTableMetadata: Listed partitions from metadata: #partitions=1 | |
23/05/17 16:15:08 INFO HiveSyncTool: New Partitions [2018/08/31] | |
23/05/17 16:15:08 INFO HMSDDLExecutor: Adding partitions 1 to table stocks20230517t161102 | |
23/05/17 16:15:09 INFO HMSDDLExecutor: HMSDDLExecutor add a batch partitions done: 1 | |
23/05/17 16:15:09 INFO HiveSyncTool: Sync complete for stocks20230517t161102 | |
23/05/17 16:15:09 INFO metastore: Closed a connection to metastore, current connections: 0 | |
23/05/17 16:15:09 INFO DeltaSync: Shutting down embedded timeline server | |
23/05/17 16:15:09 INFO EmbeddedTimelineService: Closing Timeline server | |
23/05/17 16:15:09 INFO TimelineService: Closing Timeline Service | |
23/05/17 16:15:09 INFO Javalin: Stopping Javalin ... | |
23/05/17 16:15:09 INFO Javalin: Javalin has stopped | |
23/05/17 16:15:09 INFO TimelineService: Closed Timeline Service | |
23/05/17 16:15:09 INFO EmbeddedTimelineService: Closed Timeline server | |
23/05/17 16:15:09 INFO HoodieDeltaStreamer: Shut down delta streamer | |
23/05/17 16:15:09 INFO SparkUI: Stopped Spark web UI at http://ip-172-31-19-77.us-east-2.compute.internal:8090 | |
23/05/17 16:15:09 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! | |
23/05/17 16:15:09 INFO MemoryStore: MemoryStore cleared | |
23/05/17 16:15:09 INFO BlockManager: BlockManager stopped | |
23/05/17 16:15:09 INFO BlockManagerMaster: BlockManagerMaster stopped | |
23/05/17 16:15:09 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! | |
23/05/17 16:15:09 INFO SparkContext: Successfully stopped SparkContext | |
23/05/17 16:15:09 INFO ShutdownHookManager: Shutdown hook called | |
23/05/17 16:15:09 INFO ShutdownHookManager: Deleting directory /mnt/tmp/spark-997c8508-ad8f-4762-8b4e-7fc5dbf34d55 | |
23/05/17 16:15:09 INFO ShutdownHookManager: Deleting directory /mnt/tmp/spark-8a4988c1-389e-45a2-b3de-88bd96f1eba1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment