w32zhong/bark.py

## bark.py
# pip install git+https://github.com/suno-ai/bark.git && pip uninstall -y torch torchvision torchaudio && pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118

text_prompt = """
For all neural retrievers, we fine-tune them on top of a further-pretrained backbone using a batched triplets.
It contains a query q, and a pair of positive and negative passages, p plus and p minus.
we use passages of other training instances as additional negatives, which is a common practice to get more training samples for free, basically.

"""

from bark import SAMPLE_RATE, generate_audio, preload_models
import numpy as np
from bark.api import semantic_to_waveform
import soundfile as sf

preload_models()

inputs = text_prompt.strip().split('\n')
silence = np.zeros(int(0.25 * SAMPLE_RATE))
GEN_TEMP = 0.6
SPEAKER = "v2/en_speaker_6"

pieces = []
for input_line in inputs[:]:
  print('LINE:', input_line)
  audio_array = generate_audio(
        input_line,
        history_prompt=SPEAKER
    )
  pieces += [audio_array, silence.copy()]
cat = np.concatenate(pieces)

sf.write('download.wav', cat, SAMPLE_RATE, 'PCM_24')

#sudo pacman -S twolame sox
#sox *.wav output/output.wav
#ffmpeg -i output/output.wav -ab 320k -f mp3 output/output.mp3
	# pip install git+https://github.com/suno-ai/bark.git && pip uninstall -y torch torchvision torchaudio && pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118

	text_prompt = """
	For all neural retrievers, we fine-tune them on top of a further-pretrained backbone using a batched triplets.
	It contains a query q, and a pair of positive and negative passages, p plus and p minus.
	we use passages of other training instances as additional negatives, which is a common practice to get more training samples for free, basically.

	"""

	from bark import SAMPLE_RATE, generate_audio, preload_models
	import numpy as np
	from bark.api import semantic_to_waveform
	import soundfile as sf

	preload_models()

	inputs = text_prompt.strip().split('\n')
	silence = np.zeros(int(0.25 * SAMPLE_RATE))
	GEN_TEMP = 0.6
	SPEAKER = "v2/en_speaker_6"

	pieces = []
	for input_line in inputs[:]:
	print('LINE:', input_line)
	audio_array = generate_audio(
	input_line,
	history_prompt=SPEAKER
	)
	pieces += [audio_array, silence.copy()]
	cat = np.concatenate(pieces)

	sf.write('download.wav', cat, SAMPLE_RATE, 'PCM_24')

	#sudo pacman -S twolame sox
	#sox *.wav output/output.wav
	#ffmpeg -i output/output.wav -ab 320k -f mp3 output/output.mp3