Last active
May 15, 2023 21:37
-
-
Save w32zhong/d0d85ec7f7a4eae73481688a327a57b5 to your computer and use it in GitHub Desktop.
Bark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install git+https://github.com/suno-ai/bark.git && pip uninstall -y torch torchvision torchaudio && pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118 | |
text_prompt = """ | |
For all neural retrievers, we fine-tune them on top of a further-pretrained backbone using a batched triplets. | |
It contains a query q, and a pair of positive and negative passages, p plus and p minus. | |
we use passages of other training instances as additional negatives, which is a common practice to get more training samples for free, basically. | |
""" | |
from bark import SAMPLE_RATE, generate_audio, preload_models | |
import numpy as np | |
from bark.api import semantic_to_waveform | |
import soundfile as sf | |
preload_models() | |
inputs = text_prompt.strip().split('\n') | |
silence = np.zeros(int(0.25 * SAMPLE_RATE)) | |
GEN_TEMP = 0.6 | |
SPEAKER = "v2/en_speaker_6" | |
pieces = [] | |
for input_line in inputs[:]: | |
print('LINE:', input_line) | |
audio_array = generate_audio( | |
input_line, | |
history_prompt=SPEAKER | |
) | |
pieces += [audio_array, silence.copy()] | |
cat = np.concatenate(pieces) | |
sf.write('download.wav', cat, SAMPLE_RATE, 'PCM_24') | |
#sudo pacman -S twolame sox | |
#sox *.wav output/output.wav | |
#ffmpeg -i output/output.wav -ab 320k -f mp3 output/output.mp3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment