Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ad1happy2go/937e1bf894e7de12845bbdc5c845e62a to your computer and use it in GitHub Desktop.
Save ad1happy2go/937e1bf894e7de12845bbdc5c845e62a to your computer and use it in GitHub Desktop.
create table table1 (
id int,
dt string,
name string,
price double,
ts long
) using hudi
tblproperties (
primaryKey = 'id',
type = 'cow',
preCombineField = 'ts',
'hoodie.datasource.hive_sync.mode' = 'hms',
'hoodie.datasource.hive_sync.enable' = 'true'
)
partitioned by (dt)
location '/tmp/schema_evo/table1_1';
insert into table1 values
(1, 'a1', 10, 100, "2021-01-05"),
(2, 'a2', 20, 2000, "2021-01-06"),
(3, 'a3', 30, 3000, "2021-01-07");
ALTER TABLE table1 ADD COLUMNS(ext0 int);
insert into table1 values
(1, 'a1', 10, 100, "2021-01-05"),
(2, 'a2', 20, 2000, "2021-01-06"),
asfa;
insert into table1 values (5, 'a5', 10, 100,12, "2021-01-05");
ALTER TABLE table1 DROP COLUMN price;
--------_WITHOUT HIVE SYNC-------------------
create table table1 (
id int,
dt string,
name string,
price double,
ts long
) using hudi
tblproperties (
primaryKey = 'id',
type = 'cow',
preCombineField = 'ts',
'hoodie.datasource.meta_sync.enable' = 'false',
'hoodie.datasource.hive_sync.enable' = 'false'
)
partitioned by (dt)
location '/tmp/schema_evo/table1_1_without_sync';
insert into table1 values
(1, 'a1', 10, 100, "2021-01-05"),
(2, 'a2', 20, 2000, "2021-01-06"),
(3, 'a3', 30, 3000, "2021-01-07");
ALTER TABLE table1 ADD COLUMNS(ext0 int);
insert into table1 values
(1, 'a1', 10, 100, "2021-01-05"),
(2, 'a2', 20, 2000, "2021-01-06"),
asfa;
insert into table1 values (5, 'a5', 10, 100,12, "2021-01-05");
ALTER TABLE table1 DROP COLUMN price;
--------_WITH HIVE SYNC-------------------
spark-sql> create table table1 (
> id int,
> dt string,
> name string,
> price double,
> ts long
> ) using hudi
> tblproperties (
> primaryKey = 'id',
> type = 'cow',
> preCombineField = 'ts',
> 'hoodie.datasource.hive_sync.mode' = 'hms',
> 'hoodie.datasource.hive_sync.enable' = 'true'
> )
> partitioned by (dt)
> location '/tmp/schema_evo/table1_1';
2023-05-17 08:56:19,384 WARN config.DFSPropertiesConfiguration: Cannot find HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf
2023-05-17 08:56:19,900 WARN session.SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.
Time taken: 3.214 seconds
spark-sql> insert into table1 values
> (1, 'a1', 10, 100, "2021-01-05"),
> (2, 'a2', 20, 2000, "2021-01-06"),
> (3, 'a3', 30, 3000, "2021-01-07");
2023-05-17 08:56:25,769 WARN metadata.HoodieBackedTableMetadata: Metadata table was not found at path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1/.hoodie/metadata
2023-05-17 08:56:30,838 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:56:30,838 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:56:30,839 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:56:32,624 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense failed.]
2023-05-17 08:56:36,623 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:56:37,757 WARN conf.HiveConf: HiveConf of name hive.server2.thrift.url does not exist
Time taken: 15.171 seconds
spark-sql> ALTER TABLE table1 ADD COLUMNS(ext0 int);
2023-05-17 08:56:52,184 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:56:55,098 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
Time taken: 3.72 seconds
spark-sql>
> insert into table1 values
> (1, 'a1', 10, 100, "2021-01-05"),
> (2, 'a2', 20, 2000, "2021-01-06"),
> asfa;
Error in query: cannot resolve 'asfa' given input columns: []; line 4 pos 2;
'InsertIntoStatement 'UnresolvedRelation [table1], [], false, false, false
+- 'UnresolvedInlineTable [col1, col2, col3, col4, col5], [[1, a1, 10, 100, 2021-01-05], [2, a2, 20, 2000, 2021-01-06], ['asfa]]
spark-sql> insert into table1 values (5, 'a5', 10, 100, "2021-01-05", 12);
Error in query: Cannot write incompatible data to table 'default.table1':
- Cannot safely cast 'ext0': string to int
spark-sql> desc table1;
_hoodie_commit_time string
_hoodie_commit_seqno string
_hoodie_record_key string
_hoodie_partition_path string
_hoodie_file_name string
id int
name string
price double
ts bigint
ext0 int
dt string
# Partition Information
# col_name data_type comment
dt string
Time taken: 0.089 seconds, Fetched 14 row(s)
spark-sql>
> insert into table1 values (5, 'a5', 10, 100,12, "2021-01-05");
2023-05-17 08:58:06,430 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:58:06,968 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:58:09,746 WARN marker.WriteMarkersFactory: Timeline-server-based markers are not supported for HDFS: base path hdfs://ip-172-31-23-26.us-east-2.compute.internal:8020/tmp/schema_evo/table1_1. Falling back to direct markers.
2023-05-17 08:58:09,886 WARN conf.HiveConf: HiveConf of name hive.server2.thrift.url does not exist
Time taken: 6.053 seconds
spark-sql>
>
> ALTER TABLE table1 DROP COLUMN price;
Error in query: DROP COLUMN is only supported with v2 tables.
select * from table1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment