tam17aki/demo_ILRMA.py

## demo_ILRMA.py
from oct2py import octave
import numpy as np
from scipy import signal

# path for ILRMA
path_to_ILRMA = '/path/to/ILRMA'
octave.addpath(path_to_ILRMA)

# Set parameters
seed = 1  # pseudo random seed
refMic = 1  # reference microphone for back projection
fsResample = 16000  # resampling frequency [Hz]
ns = 2  # number of sources
fftSize = 4096  # window length in STFT [points]
shiftSize = 2048  # shift length in STFT [points]

# number of bases (for type=1, nb is # of bases for "each" source.
# for type=2, nb is # of bases for "all" sources)
nb = 10

# number of iterations (define by checking convergence
# behavior with drawConv=true)
it = 100

# 1 or 2 (1: ILRMA w/o partitioning function,
# 2: ILRMA with partitioning function)
type = 1

# true or false (true: plot cost function values in each iteration
# and show convergence behavior, false: faster and
# do not plot cost function values)
drawConv = octave.logical(1)

# true or false (true: apply normalization in each iteration
# of ILRMA to improve numerical stability, but the monotonic
# decrease of the cost function may be lost. false: do not apply normalization)
normalize = octave.logical(1)

# Fix random seed
octave.rand('seed', seed)

# signal x channel x source (source image)
sig_src1, fs = octave.audioread(path_to_ILRMA + '/input/drums.wav', nout=2)
sig_src2, fs = octave.audioread(path_to_ILRMA + '/input/piano.wav', nout=2)

sig = np.stack([sig_src1, sig_src2], axis=1)
del sig_src1, sig_src2

# resampling for reducing computational cost
sig_src1 = signal.resample_poly(sig[:, :, 0], fsResample, fs)
sig_src2 = signal.resample_poly(sig[:, :, 1], fsResample, fs)
sig_resample = np.stack([sig_src1, sig_src2], axis=1)
del sig_src1, sig_src2

mix1 = sig_resample[:, 0, 0] + sig_resample[:, 0, 1]
mix2 = sig_resample[:, 1, 0] + sig_resample[:, 1, 1]
mix = np.stack([mix1, mix2], axis=1)
del mix1, mix2

sep, cost = octave.bss_ILRMA(mix, ns, nb, fftSize, shiftSize, it, type,
                             refMic, drawConv, normalize, nout=2)

outputDir = './output'
os.makedirs(outputDir, exist_ok=True)

# observed signal
octave.audiowrite('{}/observedMixture.wav'.format(outputDir),
                  mix, fsResample)

# source signal 1
octave.audiowrite('{}/originalSource1.wav'.format(outputDir),
                  sig_resample[:, refMic - 1, 0], fsResample)

# source signal 2
octave.audiowrite('{}/originalSource2.wav'.format(outputDir),
                  sig_resample[:, refMic - 1, 1], fsResample)

# estimated signal 1
octave.audiowrite('{}/estimatedSignal1.wav'.format(outputDir),
                  sep[:, 0], fsResample)

# estimated signal 2
octave.audiowrite('{}/estimatedSignal2.wav'.format(outputDir),
                  sep[:, 1], fsResample)
	from oct2py import octave
	import numpy as np
	from scipy import signal

	# path for ILRMA
	path_to_ILRMA = '/path/to/ILRMA'
	octave.addpath(path_to_ILRMA)

	# Set parameters
	seed = 1 # pseudo random seed
	refMic = 1 # reference microphone for back projection
	fsResample = 16000 # resampling frequency [Hz]
	ns = 2 # number of sources
	fftSize = 4096 # window length in STFT [points]
	shiftSize = 2048 # shift length in STFT [points]

	# number of bases (for type=1, nb is # of bases for "each" source.
	# for type=2, nb is # of bases for "all" sources)
	nb = 10

	# number of iterations (define by checking convergence
	# behavior with drawConv=true)
	it = 100

	# 1 or 2 (1: ILRMA w/o partitioning function,
	# 2: ILRMA with partitioning function)
	type = 1

	# true or false (true: plot cost function values in each iteration
	# and show convergence behavior, false: faster and
	# do not plot cost function values)
	drawConv = octave.logical(1)

	# true or false (true: apply normalization in each iteration
	# of ILRMA to improve numerical stability, but the monotonic
	# decrease of the cost function may be lost. false: do not apply normalization)
	normalize = octave.logical(1)

	# Fix random seed
	octave.rand('seed', seed)

	# signal x channel x source (source image)
	sig_src1, fs = octave.audioread(path_to_ILRMA + '/input/drums.wav', nout=2)
	sig_src2, fs = octave.audioread(path_to_ILRMA + '/input/piano.wav', nout=2)

	sig = np.stack([sig_src1, sig_src2], axis=1)
	del sig_src1, sig_src2

	# resampling for reducing computational cost
	sig_src1 = signal.resample_poly(sig[:, :, 0], fsResample, fs)
	sig_src2 = signal.resample_poly(sig[:, :, 1], fsResample, fs)
	sig_resample = np.stack([sig_src1, sig_src2], axis=1)
	del sig_src1, sig_src2

	mix1 = sig_resample[:, 0, 0] + sig_resample[:, 0, 1]
	mix2 = sig_resample[:, 1, 0] + sig_resample[:, 1, 1]
	mix = np.stack([mix1, mix2], axis=1)
	del mix1, mix2

	sep, cost = octave.bss_ILRMA(mix, ns, nb, fftSize, shiftSize, it, type,
	refMic, drawConv, normalize, nout=2)

	outputDir = './output'
	os.makedirs(outputDir, exist_ok=True)

	# observed signal
	octave.audiowrite('{}/observedMixture.wav'.format(outputDir),
	mix, fsResample)

	# source signal 1
	octave.audiowrite('{}/originalSource1.wav'.format(outputDir),
	sig_resample[:, refMic - 1, 0], fsResample)

	# source signal 2
	octave.audiowrite('{}/originalSource2.wav'.format(outputDir),
	sig_resample[:, refMic - 1, 1], fsResample)

	# estimated signal 1
	octave.audiowrite('{}/estimatedSignal1.wav'.format(outputDir),
	sep[:, 0], fsResample)

	# estimated signal 2
	octave.audiowrite('{}/estimatedSignal2.wav'.format(outputDir),
	sep[:, 1], fsResample)