Skip to content

Instantly share code, notes, and snippets.

@Isan-Rivkin
Last active November 29, 2019 11:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Isan-Rivkin/235a8b1055bae7431a3c422901ecbf1b to your computer and use it in GitHub Desktop.
Save Isan-Rivkin/235a8b1055bae7431a3c422901ecbf1b to your computer and use it in GitHub Desktop.
read and parse parquet in Rust
use std::fs::File;
use std::path::Path;
use parquet::file::reader::{FileReader, SerializedFileReader};
use parquet::record::RowAccessor;
pub fn pq_from_file(){
let file = File::open(&Path::new("./result.pq")).unwrap();
let reader = SerializedFileReader::new(file).unwrap();
let mut iter = reader.get_row_iter(None).unwrap();
let mut coun = 0;
while let Some(record) = iter.next() {
coun += 1;
println!("{}",record);
println!("0: {}", record.get_string(0).unwrap());
println!("1: {}", record.get_double(1).unwrap());
println!("2: {}", record.get_int(2).unwrap());
println!("3: {}", record.get_string(3).unwrap());
}
println!("count = {}", coun);
}
use std::fs::File;
use std::io::Write;
use rusoto_s3::{S3Client,S3,GetObjectRequest};
use futures::{Future, Stream};
use rusoto_core::{Region};
pub fn read_from_s3(){
let s3_client = S3Client::new(Region::UsEast1);
let bucket_name = "your-bucket";
let key = "path/to/your/parquet";
let get_req = GetObjectRequest {
bucket: bucket_name.to_owned(),
key: key.to_owned(),
..Default::default()
};
let result = s3_client
.get_object(get_req)
.sync()
.expect("Couldn't GET object");
let stream = result.body.unwrap();
let body = stream.concat2().wait().unwrap();
let mut file = File::create("./result.pq").expect("create failed");
file.write_all(&body).expect("failed to write body");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment