r9y9/a.py

## a.py
import numpy as np
import torch
import torch.nn.functional as F
import wavenet_vocoder
from nnmnkwii import preprocessing as P
from numpy import linspace, sin, pi, int16
from torch.autograd import Variable

sr = 4000

# tone synthesis


def note(freq, len, amp=1, rate=sr):
    t = linspace(0, len, len * rate)
    data = sin(2 * pi * freq * t) * amp
    return data.astype(int16)


mu = 256

tone = [0] * 5
tone[0] = note(140, 2, amp=10000)
tone[1] = note(240, 2, amp=10000)
tone[2] = note(340, 2, amp=10000)
tone[3] = note(440, 2, amp=10000)
tone[4] = note(540, 2, amp=10000)

tone = np.array(tone)

tone_n = ((tone - (tone.min())) / ((tone.max()) - (tone.min()))) * 1.9 - 0.95

tone_mu = np.array([P.mulaw_quantize(t, mu) for t in tone_n])

speakers = list(range(5))
length = 8000
d = 32
num_speakers = 5
dim_speaker_embed = 3

wavenet = wavenet_vocoder.WaveNet(
    out_channels=mu,
    kernel_size=4,
    residual_channels=d,
    gate_channels=d,
    skip_out_channels=d,
    cin_channels=2,
    gin_channels=num_speakers,
    #    n_speakers=num_speakers,
    use_speaker_embedding=False,
)

B = tone.shape[0]  # batch size
opti = torch.optim.Adam(wavenet.parameters(), lr=1e-4)

train_loss = []

X, C, G = [], [], []

for speaker, x in enumerate(tone_mu):
    speaker_one_hot = np.zeros((num_speakers), dtype=np.int64)
    speaker_one_hot[speaker] = 1  # speaker / tone frequency

    # + or - based on curr amplitude / some mock local cond
    cond = (np.identity(2)[np.array(
        (np.sign(tone[speaker]) + 1) / 2, dtype=int)]).T

    x = np.identity(mu)[x].T

    X.append(x)
    C.append(cond)

    G.append(speaker_one_hot)

X = np.array(X, dtype=np.float32)
C = np.array(C, dtype=np.float32)
G = np.array(G, dtype=np.float32)

assert X.shape == (B, mu, length)
assert C.shape == (B, 2, length)
assert G.shape == (B, num_speakers)

x = Variable(torch.from_numpy(X))  # torch.Size([5, 256, 8000])
cond = Variable(torch.from_numpy(C))  # torch.Size([5, 2, 8000])
speaker_one_hot = Variable(torch.from_numpy(G))  # torch.Size([5, 5])

out = wavenet.forward(x=x, c=cond, g=speaker_one_hot)

# for now
import sys
sys.exit(0)

loss_1_reconst = F.cross_entropy(out, x)
loss_1_reconst.backward(retain_graph=True)
opti.step()
train_loss.append(loss_1_reconst)
print(loss_1_reconst)
	import numpy as np
	import torch
	import torch.nn.functional as F
	import wavenet_vocoder
	from nnmnkwii import preprocessing as P
	from numpy import linspace, sin, pi, int16
	from torch.autograd import Variable

	sr = 4000

	# tone synthesis


	def note(freq, len, amp=1, rate=sr):
	t = linspace(0, len, len * rate)
	data = sin(2 * pi * freq * t) * amp
	return data.astype(int16)


	mu = 256

	tone = [0] * 5
	tone[0] = note(140, 2, amp=10000)
	tone[1] = note(240, 2, amp=10000)
	tone[2] = note(340, 2, amp=10000)
	tone[3] = note(440, 2, amp=10000)
	tone[4] = note(540, 2, amp=10000)

	tone = np.array(tone)

	tone_n = ((tone - (tone.min())) / ((tone.max()) - (tone.min()))) * 1.9 - 0.95

	tone_mu = np.array([P.mulaw_quantize(t, mu) for t in tone_n])

	speakers = list(range(5))
	length = 8000
	d = 32
	num_speakers = 5
	dim_speaker_embed = 3

	wavenet = wavenet_vocoder.WaveNet(
	out_channels=mu,
	kernel_size=4,
	residual_channels=d,
	gate_channels=d,
	skip_out_channels=d,
	cin_channels=2,
	gin_channels=num_speakers,
	# n_speakers=num_speakers,
	use_speaker_embedding=False,
	)

	B = tone.shape[0] # batch size
	opti = torch.optim.Adam(wavenet.parameters(), lr=1e-4)

	train_loss = []

	X, C, G = [], [], []

	for speaker, x in enumerate(tone_mu):
	speaker_one_hot = np.zeros((num_speakers), dtype=np.int64)
	speaker_one_hot[speaker] = 1 # speaker / tone frequency

	# + or - based on curr amplitude / some mock local cond
	cond = (np.identity(2)[np.array(
	(np.sign(tone[speaker]) + 1) / 2, dtype=int)]).T

	x = np.identity(mu)[x].T

	X.append(x)
	C.append(cond)

	G.append(speaker_one_hot)

	X = np.array(X, dtype=np.float32)
	C = np.array(C, dtype=np.float32)
	G = np.array(G, dtype=np.float32)

	assert X.shape == (B, mu, length)
	assert C.shape == (B, 2, length)
	assert G.shape == (B, num_speakers)

	x = Variable(torch.from_numpy(X)) # torch.Size([5, 256, 8000])
	cond = Variable(torch.from_numpy(C)) # torch.Size([5, 2, 8000])
	speaker_one_hot = Variable(torch.from_numpy(G)) # torch.Size([5, 5])

	out = wavenet.forward(x=x, c=cond, g=speaker_one_hot)

	# for now
	import sys
	sys.exit(0)

	loss_1_reconst = F.cross_entropy(out, x)
	loss_1_reconst.backward(retain_graph=True)
	opti.step()
	train_loss.append(loss_1_reconst)
	print(loss_1_reconst)