Last active
March 13, 2024 22:55
-
-
Save jurihock/539fc26d2ef3d509f45b4a1d40639e96 to your computer and use it in GitHub Desktop.
Basic phase vocoder examples for time-scale and pitch-shifting modifications
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Time-scale modification example (TSM) | |
import numpy as np | |
from dhbw import dasp | |
from sdft import STFT | |
def princarg(x): | |
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).''' | |
return np.remainder(x + 0.5, 1) - 0.5 | |
def resample(x, q): | |
'''Interpolates the 1D array `x` according to the scaling factor `q` | |
by using the band-limited sinc interpolation method.''' | |
import resampy | |
assert q > 0 | |
if q == 1: | |
return np.copy(x) | |
x = np.atleast_1d(x) | |
assert x.ndim == 1 | |
return resampy.resample(x, q, 1) | |
if __name__ == '__main__': | |
overlap = 16 | |
timefactor = 2 | |
shiftpitch = False | |
framesize = 2 * 1024 | |
hopsizeA = framesize // overlap | |
hopsizeS = int(hopsizeA * timefactor) | |
stft = STFT(framesize, hopsizeA, shift=True) | |
istft = STFT(framesize, hopsizeS, shift=True) | |
# load and analyze the input file 'x' | |
x, _, sr = dasp.io.read('x') | |
X = stft.stft(x) | |
ω = np.fft.rfftfreq(framesize) * sr | |
ΔtA = hopsizeA / sr | |
ΔtS = hopsizeS / sr | |
# preprocess phase values | |
φA = np.angle(X) / (2 * np.pi) | |
ΔφA = np.diff(φA, axis=0, prepend=0) | |
# perform time scaling | |
εA = princarg(ΔφA - ω * ΔtA) | |
εS = εA * timefactor # = εA * (ΔtS / ΔtA) | |
# postprocess phase values | |
ΔφS = εS + ω * ΔtS | |
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi) | |
# synthesize and save the output file 'y' | |
Y = np.abs(X) * np.exp(1j * φS) | |
y = istft.istft(Y) | |
if shiftpitch: | |
y = resample(y, timefactor) | |
dasp.io.write('y', y, sr) | |
dasp.io.play('y') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pitch-shifting modification example (PSM) | |
import numpy as np | |
from dhbw import dasp | |
from sdft import STFT | |
def princarg(x): | |
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).''' | |
return np.remainder(x + 0.5, 1) - 0.5 | |
def resample(x, q): | |
'''Interpolates the ND array `x` according to the scaling factor `q` | |
by using the linear interpolation method.''' | |
assert q > 0 | |
if q == 1: | |
return np.copy(x) | |
s = np.shape(x) | |
x = np.atleast_2d(x) | |
y = np.zeros_like(x) | |
n = np.shape(x)[-1] | |
m = int(n * q) | |
i = np.arange(min(n, m)) | |
k = i * n / m | |
j = np.trunc(k).astype(int) | |
k = k - j | |
ok = (0 <= j) & (j < n - 1) | |
i, j, k = i[ok], j[ok], k[ok] | |
y[..., i] = k * x[..., j + 1] + (1 - k) * x[..., j] | |
return np.reshape(y, s) | |
if __name__ == '__main__': | |
overlap = 4 | |
pitchfactor = 0.5 | |
framesize = 2 * 1024 | |
hopsize = framesize // overlap | |
stft = STFT(framesize, hopsize) | |
# load and analyze the input file 'x' | |
x, _, sr = dasp.io.read('x') | |
X = stft.stft(x) | |
ω = np.fft.rfftfreq(framesize) * sr | |
Δt = hopsize / sr | |
# preprocess phase values | |
φA = np.angle(X) / (2 * np.pi) | |
ΔφA = np.diff(φA, axis=0, prepend=0) | |
# manipulate instantaneous frequencies | |
εA = princarg(ΔφA - ω * Δt) | |
λA = εA / Δt + ω # = (εA + ω * Δt) / Δt | |
λS = resample(λA, pitchfactor) * pitchfactor | |
εS = λS * Δt # = λS * Δt - ω * Δt | |
# postprocess phase values | |
ΔφS = εS # = εS + ω * Δt | |
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi) | |
# manipulate magnitudes | |
rA = np.abs(X) | |
rS = resample(rA, pitchfactor) | |
rS[(λS <= 0) | (λS >= sr / 2)] = 0 | |
# synthesize and save the output file 'y' | |
Y = rS * np.exp(1j * φS) | |
y = stft.istft(Y) | |
dasp.io.write('y', y, sr) | |
dasp.io.play('y') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Combined pitch-shifting and time-scale modification example (PTM) | |
import numpy as np | |
from dhbw import dasp | |
from sdft import STFT | |
def princarg(x): | |
'''Wraps normalized angles `x`, e.g. divided by 2π, to the interval [−0.5, +0.5).''' | |
return np.remainder(x + 0.5, 1) - 0.5 | |
def resample(x, q): | |
'''Interpolates the ND array `x` according to the scaling factor `q` | |
by using the linear interpolation method.''' | |
assert q > 0 | |
if q == 1: | |
return np.copy(x) | |
s = np.shape(x) | |
x = np.atleast_2d(x) | |
y = np.zeros_like(x) | |
n = np.shape(x)[-1] | |
m = int(n * q) | |
i = np.arange(min(n, m)) | |
k = i * n / m | |
j = np.trunc(k).astype(int) | |
k = k - j | |
ok = (0 <= j) & (j < n - 1) | |
i, j, k = i[ok], j[ok], k[ok] | |
y[..., i] = k * x[..., j + 1] + (1 - k) * x[..., j] | |
return np.reshape(y, s) | |
if __name__ == '__main__': | |
overlap = 16 | |
pitchfactor = 0.5 | |
timefactor = 2 | |
framesize = 2 * 1024 | |
hopsizeA = framesize // overlap | |
hopsizeS = int(hopsizeA * timefactor) | |
stft = STFT(framesize, hopsizeA, shift=True) | |
istft = STFT(framesize, hopsizeS, shift=True) | |
# load and analyze the input file 'x' | |
x, _, sr = dasp.io.read('x') | |
X = stft.stft(x) | |
ω = np.fft.rfftfreq(framesize) * sr | |
ΔtA = hopsizeA / sr | |
ΔtS = hopsizeS / sr | |
# preprocess phase values | |
φA = np.angle(X) / (2 * np.pi) | |
ΔφA = np.diff(φA, axis=0, prepend=0) | |
# manipulate instantaneous frequencies | |
εA = princarg(ΔφA - ω * ΔtA) | |
λA = εA / ΔtA + ω # = (εA + ω * ΔtA) / ΔtA | |
λS = resample(λA, pitchfactor) * pitchfactor | |
εS = λS * ΔtS # = λS * ΔtS - ω * ΔtS | |
# postprocess phase values | |
ΔφS = εS # = εS + ω * ΔtS | |
φS = np.cumsum(ΔφS, axis=0) * (2 * np.pi) | |
# manipulate magnitudes | |
rA = np.abs(X) | |
rS = resample(rA, pitchfactor) | |
rS[(λS <= 0) | (λS >= sr / 2)] = 0 | |
# synthesize and save the output file 'y' | |
Y = rS * np.exp(1j * φS) | |
y = istft.istft(Y) | |
dasp.io.write('y', y, sr) | |
dasp.io.play('y') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment