Last active
November 20, 2021 17:13
-
-
Save rubyu/da3bbef0febb1a31dd23b5f2fdcc5181 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import csv | |
import math | |
import shutil | |
import random | |
from hashlib import md5 | |
from google.cloud import texttospeech | |
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'xxxx.json' | |
voice_en_gb = texttospeech.VoiceSelectionParams( | |
name='en-GB-Wavenet-F', | |
language_code='en-GB', | |
) | |
voice_en_us = texttospeech.VoiceSelectionParams( | |
name='en-US-Wavenet-H', | |
language_code='en-US', | |
) | |
voice_en_india = texttospeech.VoiceSelectionParams( | |
name='en-IN-Wavenet-D', | |
language_code='en-IN', | |
) | |
voice_ja = texttospeech.VoiceSelectionParams( | |
name='ja-JP-Wavenet-B', | |
language_code='ja-JP', | |
) | |
def get_tts_bytes(text, voice): | |
client = texttospeech.TextToSpeechClient() | |
synthesis_input = texttospeech.SynthesisInput(text=text) | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.MP3 | |
) | |
response = client.synthesize_speech( | |
input=synthesis_input, voice=voice, audio_config=audio_config | |
) | |
return response.audio_content | |
def fill_cache(text, cache_path, model_candidates): | |
if not os.path.exists(cache_path): | |
voice = random.choice(model_candidates) | |
bs = get_tts_bytes(text, voice) | |
with open(cache_path, 'wb') as out: | |
out.write(bs) | |
def to_hash(text): | |
return md5(text.encode(encoding='utf-8')).hexdigest() | |
from mutagen.mp3 import MP3 | |
from mutagen.id3 import ID3, Encoding, TIT2, TALB, TRCK, TPE1, USLT | |
def lyric_info(lyric, lang): | |
return USLT(encoding=Encoding.UTF8, lang=lang, text=lyric) | |
def create_tags(title, lyric, album, artist, track): | |
tags = ID3() | |
tags.add(TIT2(encoding=Encoding.UTF8, text=title)) | |
tags.add(TALB(encoding=Encoding.UTF8, text=album)) | |
tags.add(TPE1(encoding=Encoding.UTF8, text=artist)) | |
tags.add(TRCK(encoding=Encoding.UTF8, text=track)) | |
tags.add(lyric) | |
return tags | |
def apply_tags(mp3_path, tags): | |
mp3 = MP3(mp3_path) | |
mp3.tags = tags | |
mp3.save() | |
def main(): | |
cache_path = '.cache' | |
out_path = 'out' | |
csv_path = 'xxxx.csv' | |
album = 'xxxx' | |
artist = 'xxxx' | |
os.makedirs(cache_path, exist_ok=True) | |
os.makedirs(out_path, exist_ok=True) | |
def readall(): | |
with open(csv_path, encoding='utf_8') as file: | |
reader = csv.reader(file) | |
return list(reader) | |
rows = readall() | |
max_digits = max(2, int(math.log10(len(rows)))+1) | |
for i, row in enumerate(rows): | |
en_str, ja_str = row | |
en_hash = to_hash(en_str) | |
ja_hash = to_hash(ja_str) | |
en_cache_file = os.path.join(cache_path, en_hash) | |
ja_cache_file = os.path.join(cache_path, ja_hash) | |
# parepare caches | |
fill_cache(en_str, en_cache_file, model_candidates=[voice_en_gb, voice_en_us, voice_en_india]) | |
fill_cache(ja_str, ja_cache_file, model_candidates=[voice_ja]) | |
# write mp3 files | |
idx = str(i).zfill(max_digits) | |
out_file_0 = os.path.join(out_path, f'[{idx}]_0.mp3') | |
out_file_1 = os.path.join(out_path, f'[{idx}]_1.mp3') | |
shutil.copy(en_cache_file, out_file_0) | |
shutil.copy(ja_cache_file, out_file_1) | |
# set tags | |
apply_tags(out_file_0, create_tags(en_str, lyric_info(en_str, 'eng'), album, artist, str(i*2+1))) | |
apply_tags(out_file_1, create_tags(ja_str, lyric_info(ja_str, 'jpn'), album, artist, str(i*2+2))) | |
if i % 100 == 0: | |
print(i) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment