rubyu/en_ja_csv_to_mp3.py

## en_ja_csv_to_mp3.py
import os
import csv
import math
import shutil
import random
from hashlib import md5
from google.cloud import texttospeech

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'xxxx.json'


voice_en_gb = texttospeech.VoiceSelectionParams(
    name='en-GB-Wavenet-F',
    language_code='en-GB',
)

voice_en_us = texttospeech.VoiceSelectionParams(
    name='en-US-Wavenet-H',
    language_code='en-US',
)

voice_en_india = texttospeech.VoiceSelectionParams(
    name='en-IN-Wavenet-D',
    language_code='en-IN',
)

voice_ja = texttospeech.VoiceSelectionParams(
    name='ja-JP-Wavenet-B',
    language_code='ja-JP',
)


def get_tts_bytes(text, voice):
    client = texttospeech.TextToSpeechClient()
    synthesis_input = texttospeech.SynthesisInput(text=text)

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    return response.audio_content


def fill_cache(text, cache_path, model_candidates):
    if not os.path.exists(cache_path):
        voice = random.choice(model_candidates)
        bs = get_tts_bytes(text, voice)
        with open(cache_path, 'wb') as out:
            out.write(bs)


def to_hash(text):
    return md5(text.encode(encoding='utf-8')).hexdigest()


from mutagen.mp3 import MP3
from mutagen.id3 import ID3, Encoding, TIT2, TALB, TRCK, TPE1, USLT


def lyric_info(lyric, lang):
    return USLT(encoding=Encoding.UTF8, lang=lang, text=lyric)


def create_tags(title, lyric, album, artist, track):
    tags = ID3()
    tags.add(TIT2(encoding=Encoding.UTF8, text=title))
    tags.add(TALB(encoding=Encoding.UTF8, text=album))
    tags.add(TPE1(encoding=Encoding.UTF8, text=artist))
    tags.add(TRCK(encoding=Encoding.UTF8, text=track))
    tags.add(lyric)
    return tags


def apply_tags(mp3_path, tags):
    mp3 = MP3(mp3_path)
    mp3.tags = tags
    mp3.save()


def main():
    cache_path = '.cache'
    out_path = 'out'
    csv_path = 'xxxx.csv'
    album = 'xxxx'
    artist = 'xxxx'

    os.makedirs(cache_path, exist_ok=True)
    os.makedirs(out_path, exist_ok=True)

    def readall():
        with open(csv_path, encoding='utf_8') as file:
            reader = csv.reader(file)
            return list(reader)
    rows = readall()

    max_digits = max(2, int(math.log10(len(rows)))+1)
    for i, row in enumerate(rows):
        en_str, ja_str = row
        en_hash = to_hash(en_str)
        ja_hash = to_hash(ja_str)
        en_cache_file = os.path.join(cache_path, en_hash)
        ja_cache_file = os.path.join(cache_path, ja_hash)

        # parepare caches
        fill_cache(en_str, en_cache_file, model_candidates=[voice_en_gb, voice_en_us, voice_en_india])
        fill_cache(ja_str, ja_cache_file, model_candidates=[voice_ja])

        # write mp3 files
        idx = str(i).zfill(max_digits)
        out_file_0 = os.path.join(out_path, f'[{idx}]_0.mp3')
        out_file_1 = os.path.join(out_path, f'[{idx}]_1.mp3')
        shutil.copy(en_cache_file, out_file_0)
        shutil.copy(ja_cache_file, out_file_1)

        # set tags
        apply_tags(out_file_0, create_tags(en_str, lyric_info(en_str, 'eng'), album, artist, str(i*2+1)))
        apply_tags(out_file_1, create_tags(ja_str, lyric_info(ja_str, 'jpn'), album, artist, str(i*2+2)))

        if i % 100 == 0:
            print(i)


if __name__ == '__main__':
    main()
	import os
	import csv
	import math
	import shutil
	import random
	from hashlib import md5
	from google.cloud import texttospeech

	os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'xxxx.json'


	voice_en_gb = texttospeech.VoiceSelectionParams(
	name='en-GB-Wavenet-F',
	language_code='en-GB',
	)

	voice_en_us = texttospeech.VoiceSelectionParams(
	name='en-US-Wavenet-H',
	language_code='en-US',
	)

	voice_en_india = texttospeech.VoiceSelectionParams(
	name='en-IN-Wavenet-D',
	language_code='en-IN',
	)

	voice_ja = texttospeech.VoiceSelectionParams(
	name='ja-JP-Wavenet-B',
	language_code='ja-JP',
	)


	def get_tts_bytes(text, voice):
	client = texttospeech.TextToSpeechClient()
	synthesis_input = texttospeech.SynthesisInput(text=text)

	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3
	)

	response = client.synthesize_speech(
	input=synthesis_input, voice=voice, audio_config=audio_config
	)
	return response.audio_content


	def fill_cache(text, cache_path, model_candidates):
	if not os.path.exists(cache_path):
	voice = random.choice(model_candidates)
	bs = get_tts_bytes(text, voice)
	with open(cache_path, 'wb') as out:
	out.write(bs)


	def to_hash(text):
	return md5(text.encode(encoding='utf-8')).hexdigest()


	from mutagen.mp3 import MP3
	from mutagen.id3 import ID3, Encoding, TIT2, TALB, TRCK, TPE1, USLT


	def lyric_info(lyric, lang):
	return USLT(encoding=Encoding.UTF8, lang=lang, text=lyric)


	def create_tags(title, lyric, album, artist, track):
	tags = ID3()
	tags.add(TIT2(encoding=Encoding.UTF8, text=title))
	tags.add(TALB(encoding=Encoding.UTF8, text=album))
	tags.add(TPE1(encoding=Encoding.UTF8, text=artist))
	tags.add(TRCK(encoding=Encoding.UTF8, text=track))
	tags.add(lyric)
	return tags


	def apply_tags(mp3_path, tags):
	mp3 = MP3(mp3_path)
	mp3.tags = tags
	mp3.save()


	def main():
	cache_path = '.cache'
	out_path = 'out'
	csv_path = 'xxxx.csv'
	album = 'xxxx'
	artist = 'xxxx'

	os.makedirs(cache_path, exist_ok=True)
	os.makedirs(out_path, exist_ok=True)

	def readall():
	with open(csv_path, encoding='utf_8') as file:
	reader = csv.reader(file)
	return list(reader)
	rows = readall()

	max_digits = max(2, int(math.log10(len(rows)))+1)
	for i, row in enumerate(rows):
	en_str, ja_str = row
	en_hash = to_hash(en_str)
	ja_hash = to_hash(ja_str)
	en_cache_file = os.path.join(cache_path, en_hash)
	ja_cache_file = os.path.join(cache_path, ja_hash)

	# parepare caches
	fill_cache(en_str, en_cache_file, model_candidates=[voice_en_gb, voice_en_us, voice_en_india])
	fill_cache(ja_str, ja_cache_file, model_candidates=[voice_ja])

	# write mp3 files
	idx = str(i).zfill(max_digits)
	out_file_0 = os.path.join(out_path, f'[{idx}]_0.mp3')
	out_file_1 = os.path.join(out_path, f'[{idx}]_1.mp3')
	shutil.copy(en_cache_file, out_file_0)
	shutil.copy(ja_cache_file, out_file_1)

	# set tags
	apply_tags(out_file_0, create_tags(en_str, lyric_info(en_str, 'eng'), album, artist, str(i*2+1)))
	apply_tags(out_file_1, create_tags(ja_str, lyric_info(ja_str, 'jpn'), album, artist, str(i*2+2)))

	if i % 100 == 0:
	print(i)


	if __name__ == '__main__':
	main()