thewh1teagle/main.py

## main.py
# pip insatll numpy==1.26.4
# pip insatll soundfile
# pip install transformers
# pip install
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

from transformers import VitsModel, AutoTokenizer
import torch
import soundfile as sf

# Load the model and tokenizer
model = VitsModel.from_pretrained("facebook/mms-tts-heb")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-heb")

# Define the text
text = "שלום עולם! אני מדבר ישירות מהמחשב"

# Tokenize the text
inputs = tokenizer(text, return_tensors="pt")

# Generate the speech waveform
with torch.no_grad():
    output = model(**inputs).waveform

# Convert the output tensor to a numpy array and ensure it's in the correct shape
output_np = output.squeeze().cpu().numpy()

# Save the waveform to a WAV file
sf.write("audio.wav", output_np, samplerate=model.config.sampling_rate)
	# pip insatll numpy==1.26.4
	# pip insatll soundfile
	# pip install transformers
	# pip install
	# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

	from transformers import VitsModel, AutoTokenizer
	import torch
	import soundfile as sf

	# Load the model and tokenizer
	model = VitsModel.from_pretrained("facebook/mms-tts-heb")
	tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-heb")

	# Define the text
	text = "שלום עולם! אני מדבר ישירות מהמחשב"

	# Tokenize the text
	inputs = tokenizer(text, return_tensors="pt")

	# Generate the speech waveform
	with torch.no_grad():
	output = model(**inputs).waveform

	# Convert the output tensor to a numpy array and ensure it's in the correct shape
	output_np = output.squeeze().cpu().numpy()

	# Save the waveform to a WAV file
	sf.write("audio.wav", output_np, samplerate=model.config.sampling_rate)