Skip to content

Instantly share code, notes, and snippets.

@tam17aki
Last active January 14, 2020 07:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tam17aki/e2f39883b35dd8284c7064b9d3ecf067 to your computer and use it in GitHub Desktop.
Save tam17aki/e2f39883b35dd8284c7064b9d3ecf067 to your computer and use it in GitHub Desktop.
A Python wrapper using oct2py package for demo script of ILRMA method
from oct2py import octave
import numpy as np
from scipy import signal
# path for ILRMA
path_to_ILRMA = '/path/to/ILRMA'
octave.addpath(path_to_ILRMA)
# Set parameters
seed = 1 # pseudo random seed
refMic = 1 # reference microphone for back projection
fsResample = 16000 # resampling frequency [Hz]
ns = 2 # number of sources
fftSize = 4096 # window length in STFT [points]
shiftSize = 2048 # shift length in STFT [points]
# number of bases (for type=1, nb is # of bases for "each" source.
# for type=2, nb is # of bases for "all" sources)
nb = 10
# number of iterations (define by checking convergence
# behavior with drawConv=true)
it = 100
# 1 or 2 (1: ILRMA w/o partitioning function,
# 2: ILRMA with partitioning function)
type = 1
# true or false (true: plot cost function values in each iteration
# and show convergence behavior, false: faster and
# do not plot cost function values)
drawConv = octave.logical(1)
# true or false (true: apply normalization in each iteration
# of ILRMA to improve numerical stability, but the monotonic
# decrease of the cost function may be lost. false: do not apply normalization)
normalize = octave.logical(1)
# Fix random seed
octave.rand('seed', seed)
# signal x channel x source (source image)
sig_src1, fs = octave.audioread(path_to_ILRMA + '/input/drums.wav', nout=2)
sig_src2, fs = octave.audioread(path_to_ILRMA + '/input/piano.wav', nout=2)
sig = np.stack([sig_src1, sig_src2], axis=1)
del sig_src1, sig_src2
# resampling for reducing computational cost
sig_src1 = signal.resample_poly(sig[:, :, 0], fsResample, fs)
sig_src2 = signal.resample_poly(sig[:, :, 1], fsResample, fs)
sig_resample = np.stack([sig_src1, sig_src2], axis=1)
del sig_src1, sig_src2
mix1 = sig_resample[:, 0, 0] + sig_resample[:, 0, 1]
mix2 = sig_resample[:, 1, 0] + sig_resample[:, 1, 1]
mix = np.stack([mix1, mix2], axis=1)
del mix1, mix2
sep, cost = octave.bss_ILRMA(mix, ns, nb, fftSize, shiftSize, it, type,
refMic, drawConv, normalize, nout=2)
outputDir = './output'
os.makedirs(outputDir, exist_ok=True)
# observed signal
octave.audiowrite('{}/observedMixture.wav'.format(outputDir),
mix, fsResample)
# source signal 1
octave.audiowrite('{}/originalSource1.wav'.format(outputDir),
sig_resample[:, refMic - 1, 0], fsResample)
# source signal 2
octave.audiowrite('{}/originalSource2.wav'.format(outputDir),
sig_resample[:, refMic - 1, 1], fsResample)
# estimated signal 1
octave.audiowrite('{}/estimatedSignal1.wav'.format(outputDir),
sep[:, 0], fsResample)
# estimated signal 2
octave.audiowrite('{}/estimatedSignal2.wav'.format(outputDir),
sep[:, 1], fsResample)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment