Created
June 22, 2024 19:58
-
-
Save thewh1teagle/6d477f91d3f3fb7380b6fb3d839dda2e to your computer and use it in GitHub Desktop.
Text to speech in hebrew using mms-tts-heb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip insatll numpy==1.26.4 | |
# pip insatll soundfile | |
# pip install transformers | |
# pip install | |
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu | |
from transformers import VitsModel, AutoTokenizer | |
import torch | |
import soundfile as sf | |
# Load the model and tokenizer | |
model = VitsModel.from_pretrained("facebook/mms-tts-heb") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-heb") | |
# Define the text | |
text = "שלום עולם! אני מדבר ישירות מהמחשב" | |
# Tokenize the text | |
inputs = tokenizer(text, return_tensors="pt") | |
# Generate the speech waveform | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
# Convert the output tensor to a numpy array and ensure it's in the correct shape | |
output_np = output.squeeze().cpu().numpy() | |
# Save the waveform to a WAV file | |
sf.write("audio.wav", output_np, samplerate=model.config.sampling_rate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment