Skip to content

Instantly share code, notes, and snippets.

@jdevries3133
Last active August 20, 2021 02:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdevries3133/acbb5ba2a19093d3bcc214733ef85e5a to your computer and use it in GitHub Desktop.
Save jdevries3133/acbb5ba2a19093d3bcc214733ef85e5a to your computer and use it in GitHub Desktop.

There is a ton of (very poorly written and jarbled) code in here, but this is a a summary of what I have tried to try to reproduce the corrupt file that jvoisin originally posted on the bpo. I've had no luck, so any ideas about what to do next are much appreciated.

What I have Tried

corruption techniques

  • remove first few bytes, which I thought would affect _Stream.getcomptype but it turns out that code branch isn't even being run in these cases?
  • insert random bytes into the middle, careful not to affect the last EOF byte
  • remove the EOF byte (this just causes EOFError; nothing exciting)

procedures

Every possible permutation of create, corrupt, compress, open, using all compression types.

I'm guessing that to be successful, I might ultimately need to have a more intimate knowledge of these file formats themselves to figure out if a corruption to a specific part of the file is what causes the problem. Who knows?

Analyzing jvoisin's Example

As far as the originally posted example on the bpo, it definitely does work for reproducing the issue. At least, it worked perfectly on my machine. It did not matter what file extension I gave it, the same zlib error occured every time (before my fix).

I did look at the hexdump of jvoisin's example, and it appears to be an uncompressed tar archive, and it also appears to be a small, trivial example with names like "foo" and "bar" in the tar file. At least that shows that a minimal example should be possible, I just cannot figure out how.

"""I am poking and prodding at tarfile.open, seeing what exceptions it gives
back from different types of invalid data."""
# note: i just realized I am misdefining "byte order mark" throughout.
# what I am trying to say is the first few bytes that hint at
# the file format, which tarfile lib looks for here:
# <https://github.com/python/cpython/blob/599f5c8481ca258ca3a5d13eaee7d07a9103b5f2/Lib/tarfile.py#L583>
import traceback
import random
import tarfile
from pathlib import Path
PRINT_TRACEBACKS = False
def bad_byte_order_mark(path=None, bytes_=None):
"""Chop off the byte order mark from the file."""
if bytes_:
return bytes_[5:]
with open(path, 'rb') as f:
f.seek(5)
return f.read()
def bad_body(path=None, bytes_=None):
"""Scramble and randomize data in the body without affecting the BOM or
EOF."""
if bytes_:
data = bytearray(bytes_)
else:
with open(path, 'rb') as f:
data = bytearray(f.read())
for i in random.sample(range(15, len(data) - 1), len(data) // 5):
data[i] = random.choice(range(15, 256))
return bytes(d for d in data if d != 0x00)
def no_eof(path=None, bytes_=None):
if bytes_:
return bytes_[:-1]
with open(path, 'rb') as f:
return f.read()[:-1]
def matrix():
"""Everyone loves a little tasty spaghetti"""
files = (['valid.tar.' + ext for ext in ('bz2', 'gz', 'lzma')]
+ ['original'])
files = [Path('fixtures/' + f) for f in files]
for file_path in files:
for func in (
None,
no_eof, bad_byte_order_mark, bad_body,
(bad_byte_order_mark, bad_body, no_eof),
(bad_byte_order_mark, bad_body),
(bad_byte_order_mark, no_eof),
(bad_body, no_eof),
):
tmp = Path(f'workdir/{file_path.name}')
exc_occured = False
if PRINT_TRACEBACKS:
print('\n\n')
print('-' * 80)
# when func is actually a single function
if func and callable(func):
data = func(file_path)
with open(tmp, 'wb') as tf:
tf.write(data)
try:
tarfile.open(tmp)
except Exception as e:
exc_occured = True
if PRINT_TRACEBACKS:
traceback.print_exc()
else:
print(f'Exception type: {type(e)}')
print(f'opening {file_path} with {func.__name__} caused an '
'exception')
# when func is a tuple of functions, we send data through them all,
# then pass it into tarfile.open
elif func and isinstance(func, tuple):
functions = func
with open(tmp, 'wb') as f1:
with open(file_path, 'rb') as f2:
f1.write(f2.read())
with open(tmp, 'rb') as fl:
data = fl.read()
for func in functions:
data = func(bytes_=data)
with open(tmp, 'wb') as fl:
fl.write(data)
try:
tarfile.open(tmp)
except Exception as e:
exc_occured = True
if PRINT_TRACEBACKS:
traceback.print_exc()
else:
print(f'Exception type: {type(e)}')
print(f'functions: {[f.__name__ for f in functions]}, '
f'file: {file_path}')
# when we hit the `None` in the sequence. This is our "control,"
# ensuring that the source archives are valid an unadulterated
else:
try:
with open(file_path, 'rb') as f1:
with open(tmp, 'wb') as f2:
f2.write(f1.read())
tarfile.open(tmp)
except Exception as e:
exc_occured = True
if PRINT_TRACEBACKS:
traceback.print_exc()
else:
print(f'Exception type: {type(e)}')
if 'original' in file_path.name:
print('Original sample from @jvoisin on the bpo thread '
'failed without maniuplation, as expected.')
else:
print(
f'Opening {file_path} without manipulation caused an exception '
'for some reason'
)
print(f'Exception Occured: {exc_occured}')
if PRINT_TRACEBACKS:
print('-' * 80)
if not exc_occured:
print(f'No exception for {file_path} with func {func.__name__ if callable(func) else func}')
if __name__ == '__main__':
matrix()
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.bz2 with func None
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.bz2 with func no_eof
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/valid.tar.bz2 with bad_byte_order_mark caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/valid.tar.bz2 with bad_body caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body', 'no_eof'], file: fixtures/valid.tar.bz2
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body'], file: fixtures/valid.tar.bz2
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'no_eof'], file: fixtures/valid.tar.bz2
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_body', 'no_eof'], file: fixtures/valid.tar.bz2
Exception Occured: True
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.gz with func None
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.gz with func no_eof
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/valid.tar.gz with bad_byte_order_mark caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'EOFError'>
opening fixtures/valid.tar.gz with bad_body caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body', 'no_eof'], file: fixtures/valid.tar.gz
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body'], file: fixtures/valid.tar.gz
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'no_eof'], file: fixtures/valid.tar.gz
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'EOFError'>
functions: ['bad_body', 'no_eof'], file: fixtures/valid.tar.gz
Exception Occured: True
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.lzma with func None
--------------------------------------------------------------------------------
Exception Occured: False
No exception for fixtures/valid.tar.lzma with func no_eof
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/valid.tar.lzma with bad_byte_order_mark caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/valid.tar.lzma with bad_body caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body', 'no_eof'], file: fixtures/valid.tar.lzma
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body'], file: fixtures/valid.tar.lzma
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'no_eof'], file: fixtures/valid.tar.lzma
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_body', 'no_eof'], file: fixtures/valid.tar.lzma
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'zlib.error'>
Original sample from @jvoisin on the bpo thread failed without maniuplation, as expected.
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'zlib.error'>
opening fixtures/original with no_eof caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
opening fixtures/original with bad_byte_order_mark caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'EOFError'>
opening fixtures/original with bad_body caused an exception
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body', 'no_eof'], file: fixtures/original
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'bad_body'], file: fixtures/original
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'tarfile.ReadError'>
functions: ['bad_byte_order_mark', 'no_eof'], file: fixtures/original
Exception Occured: True
--------------------------------------------------------------------------------
Exception type: <class 'EOFError'>
functions: ['bad_body', 'no_eof'], file: fixtures/original
Exception Occured: True
"""This one takes a valid, uncompressed tar archive, corrupts it in various
ways, and also compresses it with each compression method before feeding
it into tarfile.open
This script currently has no output, but would produce output if an exception
other than a tarfile.TarError occured."""
from copy import copy
import gzip
import bz2
import lzma
import itertools
import traceback
import random
import tarfile
PRINT_EXC = False
def corrupt_start(bytes_):
return bytes_[5:]
def corrupt_body(bytes_):
ar = bytearray(bytes_)
for i in random.sample(range(10, len(ar) - 5), len(ar) // 4):
ar[i] = random.choice(range(1, 256))
return bytes(ar)
def corrupt_eof(bytes_):
return bytes_[:-1]
def print_stats(funcs, compression_meth_name, e: Exception):
print('-' * 80)
print(f'{funcs=}')
print(f'{compression_meth_name=}')
if not PRINT_EXC:
print(f'Exception={e}')
def print_stats_footer():
print('-' * 80)
print('\n\n')
def do_the_thing(funcs, compression_meth_name, compression, data):
# perform transformations
for f in funcs:
if callable(f):
data = f(data)
# compress
if callable(compression):
data = compression(data)
# write temp file
with open('workdir/file.tar', 'wb') as f:
f.write(data)
# use tarfile
try:
fh = tarfile.open('workdir/file.tar')
fh.close()
except tarfile.TarError:
pass
except Exception as e:
print_stats(funcs, compression_meth_name, e)
if PRINT_EXC:
traceback.print_exc()
print_stats_footer()
def main():
corruption_funcs = (None, corrupt_start, corrupt_body, corrupt_eof)
with open('fixtures/valid.tar', 'rb') as f:
original_data = f.read()
# torture valid.tar
for num_funcs in range(len(corruption_funcs)):
for funcs in itertools.permutations(corruption_funcs, num_funcs):
for comp_name, comp_func in {
'None': None,
'gzip': gzip.compress,
'bz2': bz2.compress,
'lzma': lzma.compress,
}.items():
if not isinstance(funcs, tuple):
funcs = [funcs]
do_the_thing(funcs, comp_name, comp_func, copy(original_data))
if __name__ == '__main__':
main()
It doesn't matter what you put in your fixtures folder.
For me, I just created some random data, like this:
thing
└── foo
├── bar
│   ├── arm.txt
│   └── biz
└── baz
└── biz
└── wow.py
5 directories, 2 files
Then, archive it three different ways:
tar -czvf fixtures/valid.tar.gz thing
tar -cvjSf fixtures/valid.tar.bz2 thing
tar -c --lzma -f fixtures/valid.tar.lzma thing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment