Skip to content

Instantly share code, notes, and snippets.

@jurihock
Last active March 13, 2024 22:55
Show Gist options
  • Save jurihock/539fc26d2ef3d509f45b4a1d40639e96 to your computer and use it in GitHub Desktop.
Save jurihock/539fc26d2ef3d509f45b4a1d40639e96 to your computer and use it in GitHub Desktop.
Basic phase vocoder examples for time-scale and pitch-shifting modifications
# Time-scale modification example (TSM)
import numpy as np
from dhbw import dasp
from sdft import STFT
def princarg(x):
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).'''
return np.remainder(x + 0.5, 1) - 0.5
def resample(x, q):
'''Interpolates the 1D array `x` according to the scaling factor `q`
by using the band-limited sinc interpolation method.'''
import resampy
assert q > 0
if q == 1:
return np.copy(x)
x = np.atleast_1d(x)
assert x.ndim == 1
return resampy.resample(x, q, 1)
if __name__ == '__main__':
overlap = 16
timefactor = 2
shiftpitch = False
framesize = 2 * 1024
hopsizeA = framesize // overlap
hopsizeS = int(hopsizeA * timefactor)
stft = STFT(framesize, hopsizeA, shift=True)
istft = STFT(framesize, hopsizeS, shift=True)
# load and analyze the input file 'x'
x, _, sr = dasp.io.read('x')
X = stft.stft(x)
ω = np.fft.rfftfreq(framesize) * sr
ΔtA = hopsizeA / sr
ΔtS = hopsizeS / sr
# preprocess phase values
φA = np.angle(X) / (2 * np.pi)
ΔφA = np.diff(φA, axis=0, prepend=0)
# perform time scaling
εA = princarg(ΔφA - ω * ΔtA)
εS = εA * timefactor # = εA * (ΔtS / ΔtA)
# postprocess phase values
ΔφS = εS + ω * ΔtS
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi)
# synthesize and save the output file 'y'
Y = np.abs(X) * np.exp(1j * φS)
y = istft.istft(Y)
if shiftpitch:
y = resample(y, timefactor)
dasp.io.write('y', y, sr)
dasp.io.play('y')
# Pitch-shifting modification example (PSM)
import numpy as np
from dhbw import dasp
from sdft import STFT
def princarg(x):
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).'''
return np.remainder(x + 0.5, 1) - 0.5
def resample(x, q):
'''Interpolates the ND array `x` according to the scaling factor `q`
by using the linear interpolation method.'''
assert q > 0
if q == 1:
return np.copy(x)
s = np.shape(x)
x = np.atleast_2d(x)
y = np.zeros_like(x)
n = np.shape(x)[-1]
m = int(n * q)
i = np.arange(min(n, m))
k = i * n / m
j = np.trunc(k).astype(int)
k = k - j
ok = (0 <= j) & (j < n - 1)
i, j, k = i[ok], j[ok], k[ok]
y[..., i] = k * x[..., j + 1] + (1 - k) * x[..., j]
return np.reshape(y, s)
if __name__ == '__main__':
overlap = 4
pitchfactor = 0.5
framesize = 2 * 1024
hopsize = framesize // overlap
stft = STFT(framesize, hopsize)
# load and analyze the input file 'x'
x, _, sr = dasp.io.read('x')
X = stft.stft(x)
ω = np.fft.rfftfreq(framesize) * sr
Δt = hopsize / sr
# preprocess phase values
φA = np.angle(X) / (2 * np.pi)
ΔφA = np.diff(φA, axis=0, prepend=0)
# manipulate instantaneous frequencies
εA = princarg(ΔφA - ω * Δt)
λA = εA / Δt + ω # = (εA + ω * Δt) / Δt
λS = resample(λA, pitchfactor) * pitchfactor
εS = λS * Δt # = λS * Δt - ω * Δt
# postprocess phase values
ΔφS = εS # = εS + ω * Δt
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi)
# manipulate magnitudes
rA = np.abs(X)
rS = resample(rA, pitchfactor)
rS[(λS <= 0) | (λS >= sr / 2)] = 0
# synthesize and save the output file 'y'
Y = rS * np.exp(1j * φS)
y = stft.istft(Y)
dasp.io.write('y', y, sr)
dasp.io.play('y')
# Combined pitch-shifting and time-scale modification example (PTM)
import numpy as np
from dhbw import dasp
from sdft import STFT
def princarg(x):
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).'''
return np.remainder(x + 0.5, 1) - 0.5
def resample(x, q):
'''Interpolates the ND array `x` according to the scaling factor `q`
by using the linear interpolation method.'''
assert q > 0
if q == 1:
return np.copy(x)
s = np.shape(x)
x = np.atleast_2d(x)
y = np.zeros_like(x)
n = np.shape(x)[-1]
m = int(n * q)
i = np.arange(min(n, m))
k = i * n / m
j = np.trunc(k).astype(int)
k = k - j
ok = (0 <= j) & (j < n - 1)
i, j, k = i[ok], j[ok], k[ok]
y[..., i] = k * x[..., j + 1] + (1 - k) * x[..., j]
return np.reshape(y, s)
if __name__ == '__main__':
overlap = 16
pitchfactor = 0.5
timefactor = 2
framesize = 2 * 1024
hopsizeA = framesize // overlap
hopsizeS = int(hopsizeA * timefactor)
stft = STFT(framesize, hopsizeA, shift=True)
istft = STFT(framesize, hopsizeS, shift=True)
# load and analyze the input file 'x'
x, _, sr = dasp.io.read('x')
X = stft.stft(x)
ω = np.fft.rfftfreq(framesize) * sr
ΔtA = hopsizeA / sr
ΔtS = hopsizeS / sr
# preprocess phase values
φA = np.angle(X) / (2 * np.pi)
ΔφA = np.diff(φA, axis=0, prepend=0)
# manipulate instantaneous frequencies
εA = princarg(ΔφA - ω * ΔtA)
λA = εA / ΔtA + ω # = (εA + ω * ΔtA) / ΔtA
λS = resample(λA, pitchfactor) * pitchfactor
εS = λS * ΔtS # = λS * ΔtS - ω * ΔtS
# postprocess phase values
ΔφS = εS # = εS + ω * ΔtS
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi)
# manipulate magnitudes
rA = np.abs(X)
rS = resample(rA, pitchfactor)
rS[(λS <= 0) | (λS >= sr / 2)] = 0
# synthesize and save the output file 'y'
Y = rS * np.exp(1j * φS)
y = istft.istft(Y)
dasp.io.write('y', y, sr)
dasp.io.play('y')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment