Skip to content

Instantly share code, notes, and snippets.

@enginebai
Created November 13, 2023 02:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save enginebai/56e69de182b2849dcde1c8277bef6a31 to your computer and use it in GitHub Desktop.
Save enginebai/56e69de182b2849dcde1c8277bef6a31 to your computer and use it in GitHub Desktop.
import os
from bark import SAMPLE_RATE, generate_audio
from bark.generation import (
preload_models
)
from scipy.io.wavfile import write as write_wav
import nltk
import numpy as np
import argparse
# Setup environment variables to speeds up generation
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["SUNO_OFFLOAD_CPU"] = "1"
# os.environ["SUNO_USE_SMALL_MODELS"] = "1"
default_speaker = "v2/en_speaker_6"
test_script = """
"""
audio_file_extension = ".mp4"
def __generate_voice(script, speaker):
sentences = nltk.sent_tokenize(script.replace("\n", " ").strip())
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
pieces = []
for sentence in sentences:
print(f'\nGenerating audio for: "{sentence}"')
audio_array = generate_audio(sentence, history_prompt=speaker)
pieces += [audio_array, silence.copy()]
return np.concatenate(pieces)
def english_gpt(script, speaker, output_file):
# Download the models and sample data
nltk.download('punkt')
preload_models()
audio_array = __generate_voice(script if script else test_script,
speaker if speaker else default_speaker)
default_output_file = test_script[0:35].replace('\n', '').strip()
output_file = output_file if output_file else f'{default_output_file}.wav'
write_wav(output_file, SAMPLE_RATE, audio_array)
return output_file
def convert_to_mp4(wav_file, mp4_file):
command = f"ffmpeg -i '{wav_file}' -c:v libx264 -c:a aac -strict -2 '{mp4_file}'"
print(command)
os.system(command)
def start_cli():
parser = argparse.ArgumentParser(description='Generate audio from text')
parser.add_argument('--script', help='Script to generate audio from')
parser.add_argument('--input', help='Input file with script')
parser.add_argument('--speaker', help='Speaker to use')
parser.add_argument('--output', help='Output file')
args = parser.parse_args()
if args.input:
with open(args.input, 'r') as file:
my_script = file.read()
else:
my_script = args.script
generated_audio = english_gpt(my_script, args.speaker, args.output)
print(generated_audio)
convert_to_mp4(generated_audio, os.path.splitext(generated_audio)[0] + audio_file_extension)
start_cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment