Skip to content

Instantly share code, notes, and snippets.

@kylebarron
Last active May 19, 2022 20:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebarron/73d4a126499ef113add6ea1dca04e79f to your computer and use it in GitHub Desktop.
Save kylebarron/73d4a126499ef113add6ea1dca04e79f to your computer and use it in GitHub Desktop.
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "alloc-no-stdlib"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3"
[[package]]
name = "alloc-stdlib"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2"
dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "async-stream"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e"
dependencies = [
"async-stream-impl",
"futures-core",
]
[[package]]
name = "async-stream-impl"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "async-trait"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "bitpacking"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7"
dependencies = [
"crunchy",
]
[[package]]
name = "brotli"
version = "3.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor",
]
[[package]]
name = "brotli-decompressor"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
]
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
dependencies = [
"jobserver",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "flate2"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af"
dependencies = [
"cfg-if",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]]
name = "futures"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
[[package]]
name = "futures-executor"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
[[package]]
name = "futures-macro"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"
[[package]]
name = "futures-task"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
[[package]]
name = "futures-util"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "integer-encoding"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e85a1509a128c855368e135cffcde7eac17d8e1083f41e2b98c58bc1a5074be"
dependencies = [
"async-trait",
"futures-util",
]
[[package]]
name = "jobserver"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "lz4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
dependencies = [
"libc",
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "miniz_oxide"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082"
dependencies = [
"adler",
]
[[package]]
name = "parquet-format-async-temp"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf"
dependencies = [
"async-trait",
"futures",
"integer-encoding",
]
[[package]]
name = "parquet-metadata-demo"
version = "0.1.0"
dependencies = [
"parquet2",
]
[[package]]
name = "parquet2"
version = "0.12.0"
source = "git+https://github.com/jorgecarleitao/parquet2?branch=improve_meta_read#9427962ca7af01f99ccf5b960dc4bb3484ec9c3d"
dependencies = [
"async-stream",
"bitpacking",
"brotli",
"flate2",
"futures",
"lz4",
"parquet-format-async-temp",
"snap",
"streaming-decompression",
"xxhash-rust",
"zstd",
]
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "slab"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "snap"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451"
[[package]]
name = "streaming-decompression"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bc687acd5dc742c4a7094f2927a8614a68e4743ef682e7a2f9f0f711656cc92"
dependencies = [
"fallible-streaming-iterator",
]
[[package]]
name = "syn"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
[[package]]
name = "xxhash-rust"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "074914ea4eec286eb8d1fd745768504f420a1f7b7919185682a4a267bed7d2e7"
[[package]]
name = "zstd"
version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "5.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.1+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b"
dependencies = [
"cc",
"libc",
]
[package]
name = "parquet-metadata-demo"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
parquet2 = {git = "https://github.com/jorgecarleitao/parquet2", branch = "improve_meta_read"}
from io import BytesIO
import pyarrow as pa
import pyarrow.parquet as pq
def create_example_file_meta_data():
data = {
"str": pa.array(["a", "b", "c", "d"], type=pa.string()),
"uint8": pa.array([1, 2, 3, 4], type=pa.uint8()),
"int32": pa.array([0, -2147483638, 2147483637, 1], type=pa.int32()),
"bool": pa.array([True, True, False, False], type=pa.bool_()),
}
table = pa.table(data)
metadata_collector = []
pq.write_table(table, BytesIO(), metadata_collector=metadata_collector)
return table.schema, metadata_collector[0]
def main():
schema, meta = create_example_file_meta_data()
print('created collector')
metadata_collector = [meta] * 30_000
print('writing meta')
pq.write_metadata(schema, '_metadata', metadata_collector=metadata_collector)
if __name__ == '__main__':
main()
use std::{fs::File, time::Instant, io::BufReader};
use parquet2::read::read_metadata;
fn main() {
let mut file = BufReader::new(File::open("_metadata").unwrap());
let now = Instant::now();
let _ = read_metadata(&mut file).unwrap();
println!("Time to parse metadata: {}", now.elapsed().as_secs_f32());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment