Last active
February 5, 2021 20:14
-
-
Save manojkarthick/076c5785e61be54b63c6dfb591a96bd2 to your computer and use it in GitHub Desktop.
Merging parquet files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// WARNING: Not a complete executable program, this is for demonstration purposes | |
// | |
fn main() -> Result<(), CustomError> { | |
/// .. get input and output from the user | |
for input in inputs { | |
dump_contents(input, output)?; | |
} | |
Ok(()) | |
} | |
pub fn dump_contents(input: &str, output: &str) -> Result<(), CustomError> { | |
// .. open_file is a function that returns a fs::File | |
let input_file = open_file(input)?; | |
let output_file = File::create(output)?; | |
let reader = SerializedFileReader::new(input_file).unwrap(); | |
let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader)); | |
let mut arrow_batch_reader = arrow_reader.get_record_reader(1024).unwrap(); | |
let first_batch = arrow_batch_reader.next().unwrap().unwrap(); | |
let arrow_schema = first_batch.schema(); | |
debug!("{:#?}", &arrow_schema); | |
let mut writer = | |
ArrowWriter::try_new(output_file.try_clone().unwrap(), arrow_schema, None).unwrap(); | |
writer.write(&first_batch).unwrap(); | |
while let Some(batch) = arrow_batch_reader.next() { | |
let record_batch = batch.unwrap(); | |
writer.write(&record_batch).unwrap(); | |
} | |
writer.close().unwrap(); | |
// how to update FileMetaData at the end? | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment