Skip to content

Instantly share code, notes, and snippets.

@timsaucer
Created May 26, 2024 12:40
Show Gist options
  • Save timsaucer/7527c0851b379d4e9c466d8972d49a01 to your computer and use it in GitHub Desktop.
Save timsaucer/7527c0851b379d4e9c466d8972d49a01 to your computer and use it in GitHub Desktop.
This code is a test to programmatically create a DataFusion DataFrame that contains a struct of structs. It is an attempt to test the problem reported at https://github.com/apache/datafusion-python/issues/715 in the python interface. It may also be helpful to anyone who wants to create struct of struct from StructBuilder.
use datafusion::prelude::*;
use arrow::{array::{Int32Builder, StructBuilder}, datatypes::*};
#[tokio::main]
async fn main() -> datafusion::error::Result<()> {
let inner_field1 = Field::new("inner_1", DataType::Int32, true);
let inner_field2 = Field::new("inner_2", DataType::Int32, true);
let outer_fields: Fields = vec![
inner_field1.clone(),
inner_field2.clone(),
].into();
let outer_field = Field::new("outer", DataType::Struct(outer_fields.clone()), true);
let main_field = Field::new("a", DataType::Struct(vec![outer_field.clone()].into()), false);
let inner_builder = StructBuilder::new(
vec![
inner_field1.clone(),
inner_field2.clone(),
],
vec![
Box::new(Int32Builder::new()),
Box::new(Int32Builder::new()),
],
);
let outer_builder = StructBuilder::new(
vec![outer_field],
vec![Box::new(inner_builder)]
);
let mut main_builder = StructBuilder::new(
vec![main_field],
vec![Box::new(outer_builder)],
);
// Row 1: {outer: {inner_1: 1, inner_2: 2}}
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(0).unwrap() // inner_1
.append_value(1);
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(1).unwrap() // inner_1
.append_value(2);
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.append(true);
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.append(true);
main_builder.append(true);
// Row 2: {outer: {inner_1: 3, inner_2: null}}
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(0).unwrap() // inner_1
.append_value(3);
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(1).unwrap() // inner_1
.append_null();
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.append(true);
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.append(true);
main_builder.append(true);
// Row 3: {outer: null}
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(0).unwrap() // inner_1
.append_null();
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.field_builder::<Int32Builder>(1).unwrap() // inner_1
.append_null();
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.field_builder::<StructBuilder>(0).unwrap() // outer
.append_null();
main_builder
.field_builder::<StructBuilder>(0).unwrap() // main
.append(true);
main_builder.append(true);
let main_struct = main_builder.finish();
let ctx = SessionContext::default();
ctx.register_batch("table_name", main_struct.into())?;
let df = ctx.table("table_name").await?;
df.clone().select(vec![col("a")])?.show().await?;
df.clone().select(vec![col("a").field("outer")])?.show().await?;
df.clone().select(vec![col("a").field("outer").field("inner_2")])?.show().await?;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment