Skip to content

Instantly share code, notes, and snippets.

@rubyu
Last active November 20, 2021 17:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubyu/da3bbef0febb1a31dd23b5f2fdcc5181 to your computer and use it in GitHub Desktop.
Save rubyu/da3bbef0febb1a31dd23b5f2fdcc5181 to your computer and use it in GitHub Desktop.
import os
import csv
import math
import shutil
import random
from hashlib import md5
from google.cloud import texttospeech
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'xxxx.json'
voice_en_gb = texttospeech.VoiceSelectionParams(
name='en-GB-Wavenet-F',
language_code='en-GB',
)
voice_en_us = texttospeech.VoiceSelectionParams(
name='en-US-Wavenet-H',
language_code='en-US',
)
voice_en_india = texttospeech.VoiceSelectionParams(
name='en-IN-Wavenet-D',
language_code='en-IN',
)
voice_ja = texttospeech.VoiceSelectionParams(
name='ja-JP-Wavenet-B',
language_code='ja-JP',
)
def get_tts_bytes(text, voice):
client = texttospeech.TextToSpeechClient()
synthesis_input = texttospeech.SynthesisInput(text=text)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
return response.audio_content
def fill_cache(text, cache_path, model_candidates):
if not os.path.exists(cache_path):
voice = random.choice(model_candidates)
bs = get_tts_bytes(text, voice)
with open(cache_path, 'wb') as out:
out.write(bs)
def to_hash(text):
return md5(text.encode(encoding='utf-8')).hexdigest()
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, Encoding, TIT2, TALB, TRCK, TPE1, USLT
def lyric_info(lyric, lang):
return USLT(encoding=Encoding.UTF8, lang=lang, text=lyric)
def create_tags(title, lyric, album, artist, track):
tags = ID3()
tags.add(TIT2(encoding=Encoding.UTF8, text=title))
tags.add(TALB(encoding=Encoding.UTF8, text=album))
tags.add(TPE1(encoding=Encoding.UTF8, text=artist))
tags.add(TRCK(encoding=Encoding.UTF8, text=track))
tags.add(lyric)
return tags
def apply_tags(mp3_path, tags):
mp3 = MP3(mp3_path)
mp3.tags = tags
mp3.save()
def main():
cache_path = '.cache'
out_path = 'out'
csv_path = 'xxxx.csv'
album = 'xxxx'
artist = 'xxxx'
os.makedirs(cache_path, exist_ok=True)
os.makedirs(out_path, exist_ok=True)
def readall():
with open(csv_path, encoding='utf_8') as file:
reader = csv.reader(file)
return list(reader)
rows = readall()
max_digits = max(2, int(math.log10(len(rows)))+1)
for i, row in enumerate(rows):
en_str, ja_str = row
en_hash = to_hash(en_str)
ja_hash = to_hash(ja_str)
en_cache_file = os.path.join(cache_path, en_hash)
ja_cache_file = os.path.join(cache_path, ja_hash)
# parepare caches
fill_cache(en_str, en_cache_file, model_candidates=[voice_en_gb, voice_en_us, voice_en_india])
fill_cache(ja_str, ja_cache_file, model_candidates=[voice_ja])
# write mp3 files
idx = str(i).zfill(max_digits)
out_file_0 = os.path.join(out_path, f'[{idx}]_0.mp3')
out_file_1 = os.path.join(out_path, f'[{idx}]_1.mp3')
shutil.copy(en_cache_file, out_file_0)
shutil.copy(ja_cache_file, out_file_1)
# set tags
apply_tags(out_file_0, create_tags(en_str, lyric_info(en_str, 'eng'), album, artist, str(i*2+1)))
apply_tags(out_file_1, create_tags(ja_str, lyric_info(ja_str, 'jpn'), album, artist, str(i*2+2)))
if i % 100 == 0:
print(i)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment