Skip to content

Instantly share code, notes, and snippets.

@stealthinu
Created October 17, 2022 07:28
Show Gist options
  • Save stealthinu/8d773cce05c81c2134af52176400fdf0 to your computer and use it in GitHub Desktop.
Save stealthinu/8d773cce05c81c2134af52176400fdf0 to your computer and use it in GitHub Desktop.
soft-vc-test
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch, torchaudio\n",
"import IPython.display as display\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import torch.nn.functional as nnf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hubert = torch.hub.load(\"bshall/hubert:main\", \"hubert_soft\").cuda()\n",
"hubert_discrete = torch.hub.load(\"bshall/hubert:main\", \"hubert_discrete\").cuda()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"source, sr = torchaudio.load(\"dataset/textful/00_myvoice/wav/emotion002.wav\")\n",
"sample_rate = 16000\n",
"source = torchaudio.functional.resample(source, sr, sample_rate)\n",
"effects = [\n",
" ['gain', '-6.0'],\n",
" ['pitch', '+0'],\n",
" [\"rate\", f\"{sample_rate}\"]\n",
"]\n",
"source_effected = torchaudio.sox_effects.apply_effects_tensor(source, sample_rate, effects)[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"display.Audio(source_effected.squeeze().cpu(), rate=16000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with torch.inference_mode():\n",
" # Extract speech units\n",
" #units = hubert.units(source[:, :, :640+320+320]) # 320*4 -> 4\n",
" #units = hubert.units(source[:, :, :320]) # 最低 320 必要\n",
" units = hubert.units(source.unsqueeze(0).cuda())\n",
" #discrete = hubert_discrete.units(source.unsqueeze(0).cuda())\n",
" units_effected = hubert.units(source_effected.unsqueeze(0).cuda())\n",
" #discrete_effected = hubert_discrete.units(source_effected.unsqueeze(0).cuda())\n",
"diffs = units - units_effected\n",
"units.size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(units.transpose(1, 2).squeeze().cpu().numpy(), cmap=\"bwr\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(units_effected.transpose(1, 2).squeeze().cpu().numpy(), cmap=\"bwr\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(diffs.transpose(1, 2).squeeze().cpu().numpy(), cmap=\"bwr\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spec_sampling_rate = 16000 / 128\n",
"hubert_sampling_rate = 16000 / 320\n",
"hubert_to_spec_rate = spec_sampling_rate / hubert_sampling_rate\n",
"spec_size = int(units.size(2) * hubert_to_spec_rate)\n",
"print(spec_sampling_rate, hubert_sampling_rate, hubert_to_spec_rate, units.size(2), spec_size)\n",
"\n",
"units_fixed = nnf.interpolate(units.transpose(1, 2), size=(spec_size), mode='linear', align_corners=False) # 'nearest' | 'linear'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(units.transpose(1, 2)[0, 1].cpu().numpy())\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(units_fixed[0, 1].cpu().numpy())\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 ('mmvcwsl')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "ff3fdd7d80781b2f52ba823c7a1b36dbda0bf9834fe188ef410bc9fe62fc2ef7"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment