trueroad/fix-corrupt-wav.py

## fix-corrupt-wav.py
#!/usr/bin/env python3
"""
Fix corrupt WAV.

https://gist.github.com/trueroad/99259cbd772d324299fad177e6dc5088

Copyright (C) 2023 Masamichi Hosoda.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

* Redistributions of source code must retain the above copyright notice,
  this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
"""

import sys
from typing import Optional

b_fixed: bool = False


def parse_chunks(buff: bytearray, pointer: int, level: int, chunk_size: int
                 ) -> int:
    """Parse chunks."""
    p: int = pointer
    length: int = 0
    while length < chunk_size:
        length_subchunk: int = parse_chunk(buff, p, level, 0)
        if length_subchunk == 0:
            # EOF
            break
        p += length_subchunk
        length += length_subchunk
    return length


def parse_chunk(buff: bytearray, pointer: int, level: int, chunk_size: int
                ) -> int:
    """Parse one chunk."""
    if len(buff) == pointer:
        # EOF
        return 0
    if len(buff) < pointer:
        raise RuntimeError('Buffer overrun.')
    if level > 4:
        raise RuntimeError('Nesting level exceeds limit.')

    p: int = pointer
    ckID: str = (chr(buff[p]) + chr(buff[p + 1]) +
                 chr(buff[p + 2]) + chr(buff[p + 3]))
    print(f'*** Tag {ckID}, Nesting level {level}, Position 0x{p:x} ***')
    p += 4
    length: int = 4

    if ckID == 'WAVE' or ckID == 'INFO':
        # This chunk has subchunks.
        length += parse_chunks(buff, p, level + 1, chunk_size - 4)
        return length

    # This chunk has ckSize.
    ckSize: int = (buff[p] |
                   (buff[p + 1] << 8) |
                   (buff[p + 2] << 16) |
                   (buff[p + 3] << 24))
    p += 4
    length += 4
    print(f'  ckSize: {ckSize} byte(s)')

    global b_fixed
    if ckID == 'RIFF' or ckID == 'LIST':
        # This chunk has one subchunk.
        length_subchunk: int = parse_chunk(buff, p, level + 1, ckSize)

        # Check ckSize.
        if ckSize != length_subchunk:
            # Fix ckSize.
            print(f'\nFix: Tag {ckID}, '
                  f'ckSize {ckSize} -> {length_subchunk} byte(s)')
            buff[p - 4] = length_subchunk & 0xff
            buff[p - 3] = (length_subchunk >> 8) & 0xff
            buff[p - 2] = (length_subchunk >> 16) & 0xff
            buff[p - 1] = (length_subchunk >> 24) & 0xff
            b_fixed = True
        p += length_subchunk
        length += length_subchunk
    else:
        # This chunk does not have subchunks.
        p += ckSize
        length += ckSize
        # Check word-aligned.
        if ckSize % 2 != 0:
            # Check file size.
            if p == len(buff):
                # Fix by appending a padding byte.
                print(f'\nFix: append a padding byte after {ckID}')
                buff.append(0)
                b_fixed = True
            p += 1
            length += 1

    return length


def main() -> None:
    """Do main."""
    print(f'Fix corrupt WAV\n\n'
          'https://gist.github.com/trueroad/'
          '99259cbd772d324299fad177e6dc5088\n\n'
          'Copyright (C) 2023 Masamichi Hosoda.\n'
          'All rights reserved.\n')

    if not (len(sys.argv) == 2 or len(sys.argv) == 3):
        print('Usage: ./fix-corrupt-wav.py [INPUT.wav [OUTPUT.wav]]')
        sys.exit(1)

    filename_in: str = sys.argv[1]
    filename_out: Optional[str] = None
    if len(sys.argv) == 3:
        filename_out = sys.argv[2]

    print(f'Input  filename: {filename_in}\n'
          f'Output filename: {filename_out}\n')

    fin = open(filename_in, 'rb')
    buff: bytearray = bytearray(fin.read())

    if not (buff[0] == ord('R') and buff[1] == ord('I') and
            buff[2] == ord('F') and buff[3] == ord('F')):
        print('Input is not RIFF.')
        sys.exit(1)
    if not (buff[8] == ord('W') and buff[9] == ord('A') and
            buff[10] == ord('V') and buff[11] == ord('E')):
        print('Input is not WAVE.')
        sys.exit(1)

    parse_chunk(buff, 0, 0, 0)

    if filename_out is None:
        if b_fixed:
            print('\nInput WAV is corrupted.')
            sys.exit(1)
        else:
            print('\nInput WAV is not corrupted.')
            return

    fout = open(filename_out, 'wb')
    fout.write(buff)
    if b_fixed:
        print('\nOutput WAV has been fixed.')
    else:
        print('\nNo need to fix.')


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	Fix corrupt WAV.

	https://gist.github.com/trueroad/99259cbd772d324299fad177e6dc5088

	Copyright (C) 2023 Masamichi Hosoda.
	All rights reserved.

	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions
	are met:

	* Redistributions of source code must retain the above copyright notice,
	this list of conditions and the following disclaimer.

	* Redistributions in binary form must reproduce the above copyright notice,
	this list of conditions and the following disclaimer in the documentation
	and/or other materials provided with the distribution.

	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	ARE DISCLAIMED.
	IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
	FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	SUCH DAMAGE.
	"""

	import sys
	from typing import Optional

	b_fixed: bool = False


	def parse_chunks(buff: bytearray, pointer: int, level: int, chunk_size: int
	) -> int:
	"""Parse chunks."""
	p: int = pointer
	length: int = 0
	while length < chunk_size:
	length_subchunk: int = parse_chunk(buff, p, level, 0)
	if length_subchunk == 0:
	# EOF
	break
	p += length_subchunk
	length += length_subchunk
	return length


	def parse_chunk(buff: bytearray, pointer: int, level: int, chunk_size: int
	) -> int:
	"""Parse one chunk."""
	if len(buff) == pointer:
	# EOF
	return 0
	if len(buff) < pointer:
	raise RuntimeError('Buffer overrun.')
	if level > 4:
	raise RuntimeError('Nesting level exceeds limit.')

	p: int = pointer
	ckID: str = (chr(buff[p]) + chr(buff[p + 1]) +
	chr(buff[p + 2]) + chr(buff[p + 3]))
	print(f'* Tag {ckID}, Nesting level {level}, Position 0x{p:x} *')
	p += 4
	length: int = 4

	if ckID == 'WAVE' or ckID == 'INFO':
	# This chunk has subchunks.
	length += parse_chunks(buff, p, level + 1, chunk_size - 4)
	return length

	# This chunk has ckSize.
	ckSize: int = (buff[p] \|
	(buff[p + 1] << 8) \|
	(buff[p + 2] << 16) \|
	(buff[p + 3] << 24))
	p += 4
	length += 4
	print(f' ckSize: {ckSize} byte(s)')

	global b_fixed
	if ckID == 'RIFF' or ckID == 'LIST':
	# This chunk has one subchunk.
	length_subchunk: int = parse_chunk(buff, p, level + 1, ckSize)

	# Check ckSize.
	if ckSize != length_subchunk:
	# Fix ckSize.
	print(f'\nFix: Tag {ckID}, '
	f'ckSize {ckSize} -> {length_subchunk} byte(s)')
	buff[p - 4] = length_subchunk & 0xff
	buff[p - 3] = (length_subchunk >> 8) & 0xff
	buff[p - 2] = (length_subchunk >> 16) & 0xff
	buff[p - 1] = (length_subchunk >> 24) & 0xff
	b_fixed = True
	p += length_subchunk
	length += length_subchunk
	else:
	# This chunk does not have subchunks.
	p += ckSize
	length += ckSize
	# Check word-aligned.
	if ckSize % 2 != 0:
	# Check file size.
	if p == len(buff):
	# Fix by appending a padding byte.
	print(f'\nFix: append a padding byte after {ckID}')
	buff.append(0)
	b_fixed = True
	p += 1
	length += 1

	return length


	def main() -> None:
	"""Do main."""
	print(f'Fix corrupt WAV\n\n'
	'https://gist.github.com/trueroad/'
	'99259cbd772d324299fad177e6dc5088\n\n'
	'Copyright (C) 2023 Masamichi Hosoda.\n'
	'All rights reserved.\n')

	if not (len(sys.argv) == 2 or len(sys.argv) == 3):
	print('Usage: ./fix-corrupt-wav.py [INPUT.wav [OUTPUT.wav]]')
	sys.exit(1)

	filename_in: str = sys.argv[1]
	filename_out: Optional[str] = None
	if len(sys.argv) == 3:
	filename_out = sys.argv[2]

	print(f'Input filename: {filename_in}\n'
	f'Output filename: {filename_out}\n')

	fin = open(filename_in, 'rb')
	buff: bytearray = bytearray(fin.read())

	if not (buff[0] == ord('R') and buff[1] == ord('I') and
	buff[2] == ord('F') and buff[3] == ord('F')):
	print('Input is not RIFF.')
	sys.exit(1)
	if not (buff[8] == ord('W') and buff[9] == ord('A') and
	buff[10] == ord('V') and buff[11] == ord('E')):
	print('Input is not WAVE.')
	sys.exit(1)

	parse_chunk(buff, 0, 0, 0)

	if filename_out is None:
	if b_fixed:
	print('\nInput WAV is corrupted.')
	sys.exit(1)
	else:
	print('\nInput WAV is not corrupted.')
	return

	fout = open(filename_out, 'wb')
	fout.write(buff)
	if b_fixed:
	print('\nOutput WAV has been fixed.')
	else:
	print('\nNo need to fix.')


	if __name__ == '__main__':
	main()