Created
May 26, 2024 12:40
-
-
Save timsaucer/7527c0851b379d4e9c466d8972d49a01 to your computer and use it in GitHub Desktop.
This code is a test to programmatically create a DataFusion DataFrame that contains a struct of structs. It is an attempt to test the problem reported at https://github.com/apache/datafusion-python/issues/715 in the python interface. It may also be helpful to anyone who wants to create struct of struct from StructBuilder.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use datafusion::prelude::*; | |
use arrow::{array::{Int32Builder, StructBuilder}, datatypes::*}; | |
#[tokio::main] | |
async fn main() -> datafusion::error::Result<()> { | |
let inner_field1 = Field::new("inner_1", DataType::Int32, true); | |
let inner_field2 = Field::new("inner_2", DataType::Int32, true); | |
let outer_fields: Fields = vec![ | |
inner_field1.clone(), | |
inner_field2.clone(), | |
].into(); | |
let outer_field = Field::new("outer", DataType::Struct(outer_fields.clone()), true); | |
let main_field = Field::new("a", DataType::Struct(vec![outer_field.clone()].into()), false); | |
let inner_builder = StructBuilder::new( | |
vec![ | |
inner_field1.clone(), | |
inner_field2.clone(), | |
], | |
vec![ | |
Box::new(Int32Builder::new()), | |
Box::new(Int32Builder::new()), | |
], | |
); | |
let outer_builder = StructBuilder::new( | |
vec![outer_field], | |
vec![Box::new(inner_builder)] | |
); | |
let mut main_builder = StructBuilder::new( | |
vec![main_field], | |
vec![Box::new(outer_builder)], | |
); | |
// Row 1: {outer: {inner_1: 1, inner_2: 2}} | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(0).unwrap() // inner_1 | |
.append_value(1); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(1).unwrap() // inner_1 | |
.append_value(2); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.append(true); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.append(true); | |
main_builder.append(true); | |
// Row 2: {outer: {inner_1: 3, inner_2: null}} | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(0).unwrap() // inner_1 | |
.append_value(3); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(1).unwrap() // inner_1 | |
.append_null(); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.append(true); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.append(true); | |
main_builder.append(true); | |
// Row 3: {outer: null} | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(0).unwrap() // inner_1 | |
.append_null(); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.field_builder::<Int32Builder>(1).unwrap() // inner_1 | |
.append_null(); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.field_builder::<StructBuilder>(0).unwrap() // outer | |
.append_null(); | |
main_builder | |
.field_builder::<StructBuilder>(0).unwrap() // main | |
.append(true); | |
main_builder.append(true); | |
let main_struct = main_builder.finish(); | |
let ctx = SessionContext::default(); | |
ctx.register_batch("table_name", main_struct.into())?; | |
let df = ctx.table("table_name").await?; | |
df.clone().select(vec![col("a")])?.show().await?; | |
df.clone().select(vec![col("a").field("outer")])?.show().await?; | |
df.clone().select(vec![col("a").field("outer").field("inner_2")])?.show().await?; | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment