Skip to content

Instantly share code, notes, and snippets.

@r9y9
Created February 27, 2018 13:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r9y9/47df1b63680275258014359337544d4b to your computer and use it in GitHub Desktop.
Save r9y9/47df1b63680275258014359337544d4b to your computer and use it in GitHub Desktop.
import numpy as np
import torch
import torch.nn.functional as F
import wavenet_vocoder
from nnmnkwii import preprocessing as P
from numpy import linspace, sin, pi, int16
from torch.autograd import Variable
sr = 4000
# tone synthesis
def note(freq, len, amp=1, rate=sr):
t = linspace(0, len, len * rate)
data = sin(2 * pi * freq * t) * amp
return data.astype(int16)
mu = 256
tone = [0] * 5
tone[0] = note(140, 2, amp=10000)
tone[1] = note(240, 2, amp=10000)
tone[2] = note(340, 2, amp=10000)
tone[3] = note(440, 2, amp=10000)
tone[4] = note(540, 2, amp=10000)
tone = np.array(tone)
tone_n = ((tone - (tone.min())) / ((tone.max()) - (tone.min()))) * 1.9 - 0.95
tone_mu = np.array([P.mulaw_quantize(t, mu) for t in tone_n])
speakers = list(range(5))
length = 8000
d = 32
num_speakers = 5
dim_speaker_embed = 3
wavenet = wavenet_vocoder.WaveNet(
out_channels=mu,
kernel_size=4,
residual_channels=d,
gate_channels=d,
skip_out_channels=d,
cin_channels=2,
gin_channels=num_speakers,
# n_speakers=num_speakers,
use_speaker_embedding=False,
)
B = tone.shape[0] # batch size
opti = torch.optim.Adam(wavenet.parameters(), lr=1e-4)
train_loss = []
X, C, G = [], [], []
for speaker, x in enumerate(tone_mu):
speaker_one_hot = np.zeros((num_speakers), dtype=np.int64)
speaker_one_hot[speaker] = 1 # speaker / tone frequency
# + or - based on curr amplitude / some mock local cond
cond = (np.identity(2)[np.array(
(np.sign(tone[speaker]) + 1) / 2, dtype=int)]).T
x = np.identity(mu)[x].T
X.append(x)
C.append(cond)
G.append(speaker_one_hot)
X = np.array(X, dtype=np.float32)
C = np.array(C, dtype=np.float32)
G = np.array(G, dtype=np.float32)
assert X.shape == (B, mu, length)
assert C.shape == (B, 2, length)
assert G.shape == (B, num_speakers)
x = Variable(torch.from_numpy(X)) # torch.Size([5, 256, 8000])
cond = Variable(torch.from_numpy(C)) # torch.Size([5, 2, 8000])
speaker_one_hot = Variable(torch.from_numpy(G)) # torch.Size([5, 5])
out = wavenet.forward(x=x, c=cond, g=speaker_one_hot)
# for now
import sys
sys.exit(0)
loss_1_reconst = F.cross_entropy(out, x)
loss_1_reconst.backward(retain_graph=True)
opti.step()
train_loss.append(loss_1_reconst)
print(loss_1_reconst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment