Skip to content

Instantly share code, notes, and snippets.

@Filarius
Last active April 21, 2022 21:32
Show Gist options
  • Save Filarius/0485510c240a6af5ac0c46fd3cf6bed4 to your computer and use it in GitHub Desktop.
Save Filarius/0485510c240a6af5ac0c46fd3cf6bed4 to your computer and use it in GitHub Desktop.
TTS Silero-v4
import argparse
import sys
import num2words.base
import transliterate
def process_text(filename,tokens):
f = open(filename,'r', encoding="utf-8")
txt :str = f.read()
f.close()
txt = txt.lower()
txt = txt.replace('…', '.')
txt = txt.replace('...', '. ')
txt = txt.replace('+', ' плюс ')
txt = txt.replace('.\n', ' . ')
txt = txt.replace('\n', ' . ')
txt = txt.replace('\r', ' . ')
txt = txt.replace(' ', ' ')
#txt = txt.replace('.', '.\n')
#lines = txt.split('\n')
lines = txt.split(' ')
# преобразовать цифры в текст
# преобразовать латинские в русские
newlines = []
# множество латинских букв
lat_lets = set(chr(i) for i in range (ord('A'),ord('Z')+1))
lat_lets = lat_lets.union(set(chr(i) for i in range(ord('a'), ord('z') + 1)))
for word in lines:
if len(word) == 0:
continue
if any(map(str.isdigit, word)):
text = []
num = ''
for c in word:
if not c.isdigit():
if len(num)>0:
numtext = num2words.num2words(num,lang='ru')
text.append(numtext)
num = ''
text.append(c)
else:
num = num + c
if len(num) > 0:
numtext = num2words.num2words(num, lang='ru')
text.append(numtext)
word = ''.join(text)
if len(set(word).intersection(lat_lets)) > 0:
text = []
lets = ''
for c in word:
if not (c in lat_lets):
if len(lets) > 0:
letsrus = transliterate.translit(lets,'ru')
text.append(letsrus)
lets = ''
text.append(c)
else:
lets = lets + c
if len(lets) > 0:
letsrus = transliterate.translit(lets, 'ru')
text.append(letsrus)
word = ''.join(text)
newlines.append(word)
lines = newlines
# lines = [(x+'.') for x in lines]
i=0
newlines = []
while i < len(lines):
line = lines[i]
if len(line) == 0:
i = i + 1
continue
#if line[-1] != '.':
# line = line + "."
newline = []
for char in line:
if char in tokens:
newline.append(char)
else:
print("wrong char:",ord(char)," ",char)
#newline.append(" ")
# split too long text
line = ''.join(newline)
newlines.append(line)
i = i + 1
lines = newlines
newlines = []
i=0
store = []
while i < len(lines):
sl = 0
for a in store: #кастыль
sl = sl + len(a)
l = 0
k = 0
add_flag = False
while True:
if len(lines[i])+l+len(store)+sl < 900: # в том числе учесть добавляемые пробелы
l = l + len(lines[i])+1
store.append(lines[i])
add_flag = True
i = i + 1
if i == len(lines):
break
else:
break
sl = l
last_dot = 0 # split my last dot
for line in reversed(store):
if line[-1]== '.':
break
last_dot += 1
if not add_flag:
addlines = store
store = []
else:
if (last_dot != len(store)) and (last_dot != 0):
addlines = store[:-last_dot]
store = store[-last_dot:]
else:
addlines = store
store = []
newline = ' '.join(addlines)
newlines.append(newline)
lines = newlines
return lines
import subprocess,io
def writeffmpeg(name,data):
command = ['ffmpeg','-y','-f','f32le','-ar','16000','-ac','1','-i','-',str(name) + '.mp3']
process = subprocess.Popen(command,stdin=subprocess.PIPE)
process.stdin.write(data.tobytes())
process.stdin.close()
process.wait()
def writeffmpegrawlist(name, datalist, pathdir, pathfile):
command = ['ffmpeg','-y','-f','f32le','-ar',str(sample_rate),'-ac','1','-i','-','-b:a','128k',pathdir+'\\'+pathfile+' '+str(name) + '.mp3']
process = subprocess.Popen(command,stdin=subprocess.PIPE)
for data in datalist:
process.stdin.write(data.tobytes())
process.stdin.close()
process.wait()
def writeffmpegmp3(name,datalist):
commandraw = ['ffmpeg', '-hide_banner', '-loglevel', 'fatal', '-y', '-i', '-',
'-acodec', 'pcm_s16le', '-ac', '1', '-f', 's16le', '-ar', '22050','-']
commandmp3 = ['ffmpeg', '-y','-hide_banner', '-loglevel', 'fatal','-y',
'-f', 's16le', '-ar', '22050', '-ac', '1','-i','-',
str(name) + '.mp3']
mp3bytes = io.BytesIO()
rawbytes = io.BytesIO()
processmp3 = subprocess.Popen(commandmp3, stdin=subprocess.PIPE)
for data in datalist:
processraw = subprocess.Popen(commandraw, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
rawdata,_ = processraw.communicate(data.getvalue())
print('wait raw')
processraw.wait()
processmp3.stdin.write(rawdata)
#rawbytes.close()
processmp3.stdin.close()
print('wait mp3')
processmp3.wait()
import os
import torch
ars = "filename -h 48000 -s kseniya -b 1.txt".split(" ")
parser = argparse.ArgumentParser("Генератор аудиокниг (рус)",description="Генератор аудиокниг с использованием портабельной нейросети. Автор https://github.com/Filarius Сама нейросеть и большая благодарность https://github.com/snakers4/silero-models")
parser.add_argument("--threads",help="Количество потоков обработки (4)",default=4)
parser.add_argument("--gpu",help="Включить использование GPU (не тестировалось)",action='store_true',default=False)
parser.add_argument("--hz",help="Выбери Hz 8000 24000 48000",default=48000)
parser.add_argument("--speaker",help="Выбери диктора: aidar baya kseniya xenia",default='kseniya')
parser.add_argument("--book",help="Путь к книге в текстовом формате",required=False)
parser.add_argument("--output",help="Путь к папке для сохранения аудиокниги")
parser.add_argument("--accent",help="Включить авто-ударение",action='store_true',default=False)
parser.add_argument("--yo",help="Включить автоматическую простановку Ё",action='store_true',default=False)
#parser.add_argument("--bruteforce",help="Режим перебора голосов",action='store_true',default=False)
args = parser.parse_args()
#sample_rate = 48000
#speaker = 'xenia'
#put_accent=True
#put_yo=True
sample_rate = int(args.hz)
speaker = args.speaker
put_accent = args.accent
put_yo = args.yo
pathin = args.book
pathout = args.output if (args.output) else "output"
if not os.path.isdir(pathout):
os.makedirs(pathout)
threads = int(args.threads)
device = None
if args.gpu: #DOTO
device = torch.device('gpu')
else:
device = torch.device('cpu')
torch.set_num_threads(threads)
local_file = 'model.data'
if not os.path.isfile(local_file):
torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/ru_v3.pt',
local_file)
model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model")
model.to(device)
'''
if args.bruteforce:
c = 1
speaker = 'random'
example_text = 'В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров.'
while True:
audio = model.apply_tts(text=example_text,
speaker=speaker,
sample_rate=sample_rate)
audio = audio.numpy()
writeffmpegrawlist(c + 10000, [audio], pathout, "random " + pathin)
voice_path = pathout + "\\random " + str(c + 10000)+'.data'
model.save_random_voice(voice_path)
c += 1
'''
speakers = ['aidar', 'baya', 'kseniya', 'xenia', 'random']
tokens = model.symbols
lines = process_text(pathin, tokens)
audios = []
i = 0
counter = 1
while i < len(lines):
print("part:",i+1," of ",len(lines)," length:",len(lines[i]))
print(lines[i])
audio = model.apply_tts(text=lines[i],
speaker=speaker,
sample_rate=sample_rate,
put_accent=put_accent,
put_yo=put_yo)
audios.append(audio.numpy())
if ((i+1)%50)==0:
writeffmpegrawlist(counter+10000, audios, pathout, pathin)
counter += 1
audios = []
i = i + 1
if ((i)%50)!=0:
writeffmpegrawlist(counter + 10000, audios, pathout, pathin)
print("I'm DONE !")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment