Skip to content

Instantly share code, notes, and snippets.

@manojkarthick
Last active February 5, 2021 20:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save manojkarthick/076c5785e61be54b63c6dfb591a96bd2 to your computer and use it in GitHub Desktop.
Save manojkarthick/076c5785e61be54b63c6dfb591a96bd2 to your computer and use it in GitHub Desktop.
Merging parquet files
//
// WARNING: Not a complete executable program, this is for demonstration purposes
//
fn main() -> Result<(), CustomError> {
/// .. get input and output from the user
for input in inputs {
dump_contents(input, output)?;
}
Ok(())
}
pub fn dump_contents(input: &str, output: &str) -> Result<(), CustomError> {
// .. open_file is a function that returns a fs::File
let input_file = open_file(input)?;
let output_file = File::create(output)?;
let reader = SerializedFileReader::new(input_file).unwrap();
let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
let mut arrow_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
let first_batch = arrow_batch_reader.next().unwrap().unwrap();
let arrow_schema = first_batch.schema();
debug!("{:#?}", &arrow_schema);
let mut writer =
ArrowWriter::try_new(output_file.try_clone().unwrap(), arrow_schema, None).unwrap();
writer.write(&first_batch).unwrap();
while let Some(batch) = arrow_batch_reader.next() {
let record_batch = batch.unwrap();
writer.write(&record_batch).unwrap();
}
writer.close().unwrap();
// how to update FileMetaData at the end?
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment