Skip to content

Instantly share code, notes, and snippets.

@paddyhoran
Created August 28, 2019 01:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paddyhoran/598db6cbb790fc5497320613e54a02c6 to your computer and use it in GitHub Desktop.
Save paddyhoran/598db6cbb790fc5497320613e54a02c6 to your computer and use it in GitHub Desktop.
Datafusion Predicate Pushdown Issue
extern crate arrow;
extern crate datafusion;
use std::sync::Arc;
use arrow::datatypes::{Schema, Field, DataType};
use datafusion::execution::context::ExecutionContext;
/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
/// fetching results
fn main() {
let testdata =
::std::env::var("PARQUET_TEST_DATA").expect("PARQUET_TEST_DATA not defined");
let mut ctx = ExecutionContext::new();
let schema = Arc::new(Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::UInt32, false),
Field::new("c3", DataType::Int8, false),
Field::new("c4", DataType::Int16, false),
Field::new("c5", DataType::Int32, false),
Field::new("c6", DataType::Int64, false),
Field::new("c7", DataType::UInt8, false),
Field::new("c8", DataType::UInt16, false),
Field::new("c9", DataType::UInt32, false),
Field::new("c10", DataType::UInt64, false),
Field::new("c11", DataType::Float32, false),
Field::new("c12", DataType::Float64, false),
Field::new("c13", DataType::Utf8, false),
]));
ctx.register_csv(
"aggregate_test_100",
&format!("{}/csv/aggregate_test_100.csv", testdata),
&schema,
true,
);
let sql = "SELECT c2 FROM aggregate_test_100 WHERE c2 > 5";
let relation = ctx.sql(&sql, 1024 * 1024).unwrap();
let mut results = relation.borrow_mut();
while let Some(batch) = results.next().unwrap() {
println!("{}", batch.num_rows());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment