Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active December 10, 2022 11:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifthrasiir/8f8f6aa027ba2a975bbdaa9665627c8e to your computer and use it in GitHub Desktop.
Save lifthrasiir/8f8f6aa027ba2a975bbdaa9665627c8e to your computer and use it in GitHub Desktop.
Async_zip + tokio issue with nested streams
[package]
name = "async-zip-problem"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "async-zip-problem"
path = "main.rs"
[dependencies]
async_zip = { version = "0.0.9", default-features = false, features = ["deflate"] }
tokio = { version = "1.23.0", features = ["rt", "rt-multi-thread", "macros"] }
use std::error::Error;
use async_zip::{error::ZipError, read::stream::ZipFileReader};
// The following program fails with a valid ZIP file (problem.py, can be regenerated with mkproblem.py).
// This seems to be something to do with either async_zip or async-compression's buffer handling,
// and can be reproduced with the most recent version of tokio with multiple OSes and runtime flavors.
// It can be worked around by using `tokio::io::BufReader` every time `ZipFileReader` gets used.
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let file = &include_bytes!("problem.zip")[..];
let mut outer = ZipFileReader::new(file);
while let Some(mut f) = outer.entry_reader().await? {
if f.entry().filename().ends_with(".zip") {
eprintln!("{} {{", f.entry().filename());
let mut inner = ZipFileReader::new(&mut f);
while let Some(ff) = inner.entry_reader().await? {
eprint!(" {}", ff.entry().filename());
eprintln!(" {}", ff.read_to_end_crc().await?.len());
}
if !f.compare_crc() {
return Err(ZipError::CRC32CheckError.into());
}
eprintln!("}}");
} else {
eprint!("{}", f.entry().filename());
eprintln!(" {}", f.read_to_end_crc().await?.len());
}
}
Ok(())
}
from struct import pack
from binascii import crc32
def lfh(filename):
return bytes.fromhex('504b0304 1400 0800 0800 0000 0215 00000000 00000000 00000000') + \
pack('<H', len(filename)) + bytes.fromhex('0000') + filename
def deflated(data, splits):
blocks = []
off = 0
for i, sz in enumerate(splits):
assert 1 <= sz <= 65535
blocks.append(pack('<BHH', int(i == len(splits) - 1), sz, 0xffff ^ sz) + data[off:off+sz])
off += sz
assert off == len(data), (off, len(data))
blocks.append(bytes.fromhex('504b0708') + pack('<III', crc32(data), len(data) + len(splits) * 5, len(data)))
return b''.join(blocks)
def cdfh(filename, data, splits, offset):
return bytes.fromhex('504b0102 2d03 1400 0800 0800 0000 0215') + \
pack('<IIIH', crc32(data), len(data) + len(splits) * 5, len(data), len(filename)) + \
bytes.fromhex('0000 0000 0000 0000 2000a481') + pack('<I', offset) + filename
def cd(files, offset):
cd = b''.join(cdfh(*file) for file in files)
return cd + bytes.fromhex('504b0506 0000 0000') + \
pack('<HHII', len(files), len(files), len(cd), offset) + bytes.fromhex('0000')
aname = b'aaaaaaaaaaa.txt'; a = b'A' * 0x55b0; asplit = [0x55b0]
bname = b'bbbbbbbbbbb.txt'; b = b'B' * (0xfffb + 0x1f35); bsplit = [0xfffb, 0x1f35]
cname = b'ccccccccc.txt'; c = b'C' * 0x80; csplit = [0x80]
dname = b'ddddddddd.zip'; dsplit = [0x961f, 0x8000] # + alpha
aoff = 0
inner = lfh(aname) + deflated(a, asplit)
boff = len(inner)
inner += lfh(bname) + deflated(b, bsplit)
inner += cd([(aname, a, asplit, aoff), (bname, b, bsplit, boff)], len(inner))
dsplit.append(len(inner) - sum(dsplit))
coff = 0
outer = lfh(cname) + deflated(c, csplit)
doff = len(outer)
outer += lfh(dname) + deflated(inner, dsplit)
outer += cd([(cname, c, csplit, coff), (dname, inner, dsplit, doff)], len(outer))
with open('problem.zip', 'wb') as f: f.write(outer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment