Last active
February 2, 2023 02:47
-
-
Save roee88/91f2b67c3e180fa0dfb688ba8d923dae to your computer and use it in GitHub Desktop.
Arrow datafusion sample compiled to wasm32-wasi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "datafusion_sample" | |
version = "0.1.0" | |
edition = "2021" | |
[lib] | |
crate-type = ["cdylib"] | |
[dependencies] | |
datafusion = { path = "../arrow-datafusion/datafusion", default-features = false } | |
tokio = { version = "1.13", features = ["macros"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::sync::Arc; | |
use datafusion::arrow::array::{Int32Array, StringArray}; | |
use datafusion::arrow::datatypes::{DataType, Field, Schema}; | |
use datafusion::arrow::record_batch::RecordBatch; | |
use datafusion::datasource::MemTable; | |
use datafusion::error::Result; | |
use datafusion::prelude::*; | |
#[no_mangle] | |
pub fn _start() -> i32 { | |
let result = run(); | |
match result { | |
Ok(_) => 0, | |
Err(err) => { | |
println!("Error: {}", err); | |
1 | |
}, | |
} | |
} | |
#[tokio::main(flavor = "current_thread")] | |
async fn run() -> Result<()>{ | |
// define a schema. | |
let schema = Arc::new(Schema::new(vec![ | |
Field::new("a", DataType::Utf8, false), | |
Field::new("b", DataType::Int32, false), | |
])); | |
// define data. | |
let batch = RecordBatch::try_new( | |
schema.clone(), | |
vec![ | |
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])), | |
Arc::new(Int32Array::from(vec![1, 10, 10, 100])), | |
], | |
)?; | |
// Register table | |
let mut ctx = ExecutionContext::new(); | |
let provider = MemTable::try_new(schema, vec![vec![batch]])?; | |
ctx.register_table("t", Arc::new(provider))?; | |
// Execute query | |
let df = ctx.sql("SELECT a, b FROM t WHERE b = 10").await?; | |
// Show results | |
// let results = df.collect().await?; | |
df.show().await?; | |
Ok(()) | |
} |
in Oct 2022, using this gist as a starting point, I compiled datafusion to wasm32-wasi
and wasm32-unknown-unknown
details in this comment and diffs in this repo
I haven't updated it since then (~4 months ago), and I'm sure many steps are no longer necessary. I'm not sure when I will work on it next. But if anyone reading this is trying to compile DataFusion to WebAssembly, then these links might help you.
Anyway - I just realized I hadn't commented here with a link to the code, so I wanted to link to it (even if it's ~4 months stale), and also to say thank you for the original gist!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was tested with apache/datafusion@a8029e5 + the following diff for disabling some parquet features ("lz4" and "zstd"):
Compile with
cargo build --target wasm32-wasi
and run with tools like wasmtime or wasmer.