Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
**UPDATE** I'm now maintaining this Python module in a regular github repository at
# Author: Warren Weckesser
# License: BSD 3-Clause (
import wave
import numpy as np
def _wav2array(nchannels, sampwidth, data):
"""data must be the string containing the bytes from the wav file."""
num_samples, remainder = divmod(len(data), sampwidth * nchannels)
if remainder > 0:
raise ValueError('The length of data is not a multiple of '
'sampwidth * num_channels.')
if sampwidth > 4:
raise ValueError("sampwidth must not be greater than 4.")
if sampwidth == 3:
a = np.empty((num_samples, nchannels, 4), dtype=np.uint8)
raw_bytes = np.fromstring(data, dtype=np.uint8)
a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
result = a.view('<i4').reshape(a.shape[:-1])
# 8 bit samples are stored as unsigned ints; others as signed ints.
dt_char = 'u' if sampwidth == 1 else 'i'
a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
result = a.reshape(-1, nchannels)
return result
def readwav(file):
Read a wav file.
Returns the frame rate, sample width (in bytes) and a numpy array
containing the data.
This function does not read compressed wav files.
wav =
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
data = wav.readframes(nframes)
array = _wav2array(nchannels, sampwidth, data)
return rate, sampwidth, array
def writewav24(filename, rate, data):
"""Create a 24 bit wav file.
data must be "array-like", either 1- or 2-dimensional. If it is 2-d,
the rows are the frames (i.e. samples) and the columns are the channels.
The data is assumed to be signed, and the values are assumed to be
within the range of a 24 bit integer. Floating point values are
converted to integers. The data is not rescaled or normalized before
writing it to the file.
Example: Create a 3 second 440 Hz sine wave.
>>> rate = 22050 # samples per second
>>> T = 3 # sample duration (seconds)
>>> f = 440.0 # sound frequency (Hz)
>>> t = np.linspace(0, T, T*rate, endpoint=False)
>>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t)
>>> writewav24("sine24.wav", rate, x)
a32 = np.asarray(data, dtype=np.int32)
if a32.ndim == 1:
# Convert to a 2D array with a single column.
a32.shape = a32.shape + (1,)
# By shifting first 0 bits, then 8, then 16, the resulting output
# is 24 bit little-endian.
a8 = (a32.reshape(a32.shape + (1,)) >> np.array([0, 8, 16])) & 255
wavdata = a8.astype(np.uint8).tostring()
w =, 'wb')

This comment has been minimized.

Copy link

@yPhil-gh yPhil-gh commented Oct 28, 2014

Hi Warren, right now to plot audio data with matplotlib I do this

from matplotlib.figure import Figure
from matplotlib.backends.backend_gtkagg import FigureCanvasGTKAgg as FigureCanvas
import as wavfile

import wave
import numpy as np
        # rate, data =, 'r'), True)

And you are right, it does not read 24 bit PCM. (But it does 16)

Now if I try and use your func and do this

        rate, data = readwav(filename)

I get a nasty "ValueError: too many values to unpack"

And if I shadow that one extra return val:

        rate, data, array = readwav(filename)

I get a "TypeError: object of type 'int' has no len()" :(

Please, can you tell me what I am doing wrong? Thanks for your patience reading me



This comment has been minimized.

Copy link
Owner Author

@WarrenWeckesser WarrenWeckesser commented Nov 9, 2014

@xaccrocheur: I just discovered your comment, so you might have figured this out by now. As explained in its docstring, the function readwav in this gist returns three values: rate, sampwidth and array. So you should use it like

rate, sampwidth, data = readwav(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.