Last active
December 10, 2022 11:50
-
-
Save lifthrasiir/8f8f6aa027ba2a975bbdaa9665627c8e to your computer and use it in GitHub Desktop.
Async_zip + tokio issue with nested streams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "async-zip-problem" | |
version = "0.1.0" | |
edition = "2021" | |
[[bin]] | |
name = "async-zip-problem" | |
path = "main.rs" | |
[dependencies] | |
async_zip = { version = "0.0.9", default-features = false, features = ["deflate"] } | |
tokio = { version = "1.23.0", features = ["rt", "rt-multi-thread", "macros"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::error::Error; | |
use async_zip::{error::ZipError, read::stream::ZipFileReader}; | |
// The following program fails with a valid ZIP file (problem.py, can be regenerated with mkproblem.py). | |
// This seems to be something to do with either async_zip or async-compression's buffer handling, | |
// and can be reproduced with the most recent version of tokio with multiple OSes and runtime flavors. | |
// It can be worked around by using `tokio::io::BufReader` every time `ZipFileReader` gets used. | |
#[tokio::main] | |
async fn main() -> Result<(), Box<dyn Error>> { | |
let file = &include_bytes!("problem.zip")[..]; | |
let mut outer = ZipFileReader::new(file); | |
while let Some(mut f) = outer.entry_reader().await? { | |
if f.entry().filename().ends_with(".zip") { | |
eprintln!("{} {{", f.entry().filename()); | |
let mut inner = ZipFileReader::new(&mut f); | |
while let Some(ff) = inner.entry_reader().await? { | |
eprint!(" {}", ff.entry().filename()); | |
eprintln!(" {}", ff.read_to_end_crc().await?.len()); | |
} | |
if !f.compare_crc() { | |
return Err(ZipError::CRC32CheckError.into()); | |
} | |
eprintln!("}}"); | |
} else { | |
eprint!("{}", f.entry().filename()); | |
eprintln!(" {}", f.read_to_end_crc().await?.len()); | |
} | |
} | |
Ok(()) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from struct import pack | |
from binascii import crc32 | |
def lfh(filename): | |
return bytes.fromhex('504b0304 1400 0800 0800 0000 0215 00000000 00000000 00000000') + \ | |
pack('<H', len(filename)) + bytes.fromhex('0000') + filename | |
def deflated(data, splits): | |
blocks = [] | |
off = 0 | |
for i, sz in enumerate(splits): | |
assert 1 <= sz <= 65535 | |
blocks.append(pack('<BHH', int(i == len(splits) - 1), sz, 0xffff ^ sz) + data[off:off+sz]) | |
off += sz | |
assert off == len(data), (off, len(data)) | |
blocks.append(bytes.fromhex('504b0708') + pack('<III', crc32(data), len(data) + len(splits) * 5, len(data))) | |
return b''.join(blocks) | |
def cdfh(filename, data, splits, offset): | |
return bytes.fromhex('504b0102 2d03 1400 0800 0800 0000 0215') + \ | |
pack('<IIIH', crc32(data), len(data) + len(splits) * 5, len(data), len(filename)) + \ | |
bytes.fromhex('0000 0000 0000 0000 2000a481') + pack('<I', offset) + filename | |
def cd(files, offset): | |
cd = b''.join(cdfh(*file) for file in files) | |
return cd + bytes.fromhex('504b0506 0000 0000') + \ | |
pack('<HHII', len(files), len(files), len(cd), offset) + bytes.fromhex('0000') | |
aname = b'aaaaaaaaaaa.txt'; a = b'A' * 0x55b0; asplit = [0x55b0] | |
bname = b'bbbbbbbbbbb.txt'; b = b'B' * (0xfffb + 0x1f35); bsplit = [0xfffb, 0x1f35] | |
cname = b'ccccccccc.txt'; c = b'C' * 0x80; csplit = [0x80] | |
dname = b'ddddddddd.zip'; dsplit = [0x961f, 0x8000] # + alpha | |
aoff = 0 | |
inner = lfh(aname) + deflated(a, asplit) | |
boff = len(inner) | |
inner += lfh(bname) + deflated(b, bsplit) | |
inner += cd([(aname, a, asplit, aoff), (bname, b, bsplit, boff)], len(inner)) | |
dsplit.append(len(inner) - sum(dsplit)) | |
coff = 0 | |
outer = lfh(cname) + deflated(c, csplit) | |
doff = len(outer) | |
outer += lfh(dname) + deflated(inner, dsplit) | |
outer += cd([(cname, c, csplit, coff), (dname, inner, dsplit, doff)], len(outer)) | |
with open('problem.zip', 'wb') as f: f.write(outer) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment