tsukumijima/PriconneVoiceExtractor.py

## PriconneVoiceExtractor.py

# import
import os
import sys
import json
import argparse
import requests
from pprint import pprint
from bs4 import BeautifulSoup


def download_file(url, path):
    try:
        # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/78.0'}
        response = requests.get(url)
        with open(path, 'wb') as file:
            file.write(response.content)
    except Exception as e:
        print(e)


def main():

    # 引数を設定・取得
    parser = argparse.ArgumentParser(description = 'Tool to extract "Princess Connect! Re:Dive" voice data.', formatter_class = argparse.RawTextHelpFormatter)
    parser.add_argument('CharacterName', help = 'Character name to get (get all by specifying "ALL")')
    parser.add_argument('OutputFolder', help = 'Folder to output')
    parser.add_argument('--max-voice', type = int, help = 'Maximum number of voices to get')
    parser.add_argument('--max-story', type = int, help = 'Maximum number of stories to get')
    parser.add_argument('--min-storyid', help = 'Story ID to start get')
    parser.add_argument('--max-storyid', help = 'Story ID to end get')
    parser.add_argument('--replace-player', help = 'String that replaces {player} in the caption')
    parser.add_argument('--leave-newline', action='store_true', help = 'Leave a new line in the caption')
    parser.add_argument('--folder-storyid', action='store_true', help = 'Use story ID for subdivided folder names')
    args = parser.parse_args()


    # Voice・Caption
    voices = []

    # Story URL
    story_url = 'https://redive.estertion.win/story/data/'

    # Voice URL
    voice_url = 'https://redive.estertion.win/sound/story_vo/'

    # キャラクター名 (ALL と指定すると全て取得する)
    character_name = args.CharacterName.rstrip()

    # 出力フォルダ
    output_folder = args.OutputFolder.rstrip() + '/'

    # ボイスの最大取得数 (未指定なら無制限)
    voice_maxcount = args.max_voice

    # ストーリーの最大取得数 (未指定なら無制限)
    story_maxcount = args.max_story

    # 取得を開始する Story ID
    min_storyid = args.min_storyid

    # 取得を開始する Story ID
    max_storyid = args.max_storyid

    # キャプション内の {player} を置換する (未指定なら置換しない)
    replace_player = args.replace_player

    # キャプション内の改行を残す (未指定なら False)
    leave_newline = args.leave_newline

    # 小分けのフォルダ名を Story ID にする (未指定なら False)
    folder_storyid = args.folder_storyid

    print()
    print('Character Name: ' + character_name)
    print('Output Folder: ' + output_folder)
    print()


    # ストーリーリストを取得
    stories_list_html = requests.get(voice_url)
    stories_list = BeautifulSoup(stories_list_html.text, 'html.parser')

    # Story ID ごとに実行
    story_count = 0
    for stories_list_link in stories_list.select('pre a'):

        # Story ID
        story_id = stories_list_link.get_text(strip = True).replace('/', '')

        # ../ はパス
        if story_id == '..':
            continue

        # 取得を開始する Story ID になるまでスキップ
        if (min_storyid != None and int(story_id) < int(min_storyid)):
            continue

        # 取得を終了する Story ID を超えた場合
        if (max_storyid != None and int(story_id) > int(max_storyid)):
            print()
            print('Notice: 取得を終了する Story ID を超えました。取得を終了します。', end = '\n\n')
            break

        # カウントする
        story_count += 1

        # ストーリーの最大取得数を超えた場合
        if (story_maxcount != None and story_count >= story_maxcount):
            print()
            print('Notice: ストーリーの最大取得数を超えました。取得を終了します。', end = '\n\n')
            break

        # ボイスの最大取得数を超えた場合
        if (voice_maxcount != None and len(voices) >= voice_maxcount):
            print()
            print('Notice: ボイスの最大取得数を超えました。取得を終了します。', end = '\n\n')
            break

        print('Story ID: ' + story_id)
        print()

        # ストーリーを取得
        try:
            stories = requests.get(story_url + story_id + '.json').json()
        except Exception:
            print('Notice: ストーリーが見つかりません。スキップします。', end = '\n\n')
            continue


        # アイテムごとに実行
        for story in stories:

            if story['name'] == 'bust':

                # リストに追加
                voices.append({
                    'story_id': story_id,
                    'character': '',
                    'voice': '',
                    'caption': '',
                })

            if story['name'] == 'vo':

                # ボイスが登録されていなければ
                if voices[-1]['voice'] == '':
                    voices[-1]['voice'] = story['args'][0] + '.m4a'

            if story['name'] == 'print':

                # リストの一番後ろの要素
                voices[-1]['character'] = story['args'][0]
                voices[-1]['caption'] += story['args'][1] # += で追記していく

                # 半角スペースを削除
                voices[-1]['caption'] = voices[-1]['caption'].replace(' ', '')

                # {player} を指定された文字列に置換
                if replace_player != None:
                    voices[-1]['caption'] = voices[-1]['caption'].replace('{player}', replace_player)

                # 改行を全角スペースに置換
                if leave_newline == False: # 改行を残さない
                    voices[-1]['caption'] = voices[-1]['caption'].replace('\n', '　')

            # 取得結果を出力
            if story['name'] == 'touch':

                if len(voices) > 0:

                    # 指定されたキャラクター or 全て取得するモードなら
                    if (character_name == voices[-1]['character'] or character_name.upper() == 'ALL'):

                        # ボイスが登録されていれば
                        if voices[-1]['voice'] != '':

                            # 表示
                            print('Character: ' + voices[-1]['character'])
                            print('Voice: ' + voices[-1]['voice'])
                            print('Caption: ' + voices[-1]['caption'])

                            # フォルダを作成
                            if folder_storyid == True:
                                subfolder = story_id # Story ID を使う
                            else:
                                subfolder = voices[-1]['character'] # キャラクター名を使う
                            os.makedirs(output_folder + subfolder, exist_ok = True)

                            # ボイスを保存
                            url = voice_url + story_id + '/' + voices[-1]['voice']
                            download_file(url, output_folder + subfolder + '/' + voices[-1]['voice'])

                            # キャプションを同じファイル名で保存
                            with open(output_folder + subfolder + '/' + voices[-1]['voice'].replace('.m4a', '.txt'), mode = 'w') as file:
                                file.write(voices[-1]['caption'])

                            # ボイスの最大取得数を超えた場合、ループを抜ける
                            if (voice_maxcount != None and len(voices) >= voice_maxcount):
                                break

                            print()

                        else:

                            # 表示
                            print('Character: ' + voices[-1]['character'])
                            print('Caption: ' + voices[-1]['caption'])

                            # ボイスが存在しないため削除
                            print('Notice: ボイスがストーリー内に見つかりません。スキップします。')
                            del voices[-1]

                    else:

                        # 指定されたキャラクターではないため削除
                        # print('Notice: 指定されたキャラクターがストーリー内に見つかりません。スキップします。')
                        del voices[-1]

    # 取得結果
    print()
    print('Extracted Voices: ')
    print('Character: ' + character_name + ' Count: ' + str(len(voices)), end = '\n\n')

    for voice in voices:
        print('Story ID: ' + voice['story_id'])
        print('Voice: ' + voice['voice'])
        print('Caption: ' + voice['caption'])
        print()

    print('Extracted. Finish.')
    print()


if __name__ == '__main__':
    main()

	# import
	import os
	import sys
	import json
	import argparse
	import requests
	from pprint import pprint
	from bs4 import BeautifulSoup


	def download_file(url, path):
	try:
	# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/78.0'}
	response = requests.get(url)
	with open(path, 'wb') as file:
	file.write(response.content)
	except Exception as e:
	print(e)


	def main():

	# 引数を設定・取得
	parser = argparse.ArgumentParser(description = 'Tool to extract "Princess Connect! Re:Dive" voice data.', formatter_class = argparse.RawTextHelpFormatter)
	parser.add_argument('CharacterName', help = 'Character name to get (get all by specifying "ALL")')
	parser.add_argument('OutputFolder', help = 'Folder to output')
	parser.add_argument('--max-voice', type = int, help = 'Maximum number of voices to get')
	parser.add_argument('--max-story', type = int, help = 'Maximum number of stories to get')
	parser.add_argument('--min-storyid', help = 'Story ID to start get')
	parser.add_argument('--max-storyid', help = 'Story ID to end get')
	parser.add_argument('--replace-player', help = 'String that replaces {player} in the caption')
	parser.add_argument('--leave-newline', action='store_true', help = 'Leave a new line in the caption')
	parser.add_argument('--folder-storyid', action='store_true', help = 'Use story ID for subdivided folder names')
	args = parser.parse_args()


	# Voice・Caption
	voices = []

	# Story URL
	story_url = 'https://redive.estertion.win/story/data/'

	# Voice URL
	voice_url = 'https://redive.estertion.win/sound/story_vo/'

	# キャラクター名 (ALL と指定すると全て取得する)
	character_name = args.CharacterName.rstrip()

	# 出力フォルダ
	output_folder = args.OutputFolder.rstrip() + '/'

	# ボイスの最大取得数 (未指定なら無制限)
	voice_maxcount = args.max_voice

	# ストーリーの最大取得数 (未指定なら無制限)
	story_maxcount = args.max_story

	# 取得を開始する Story ID
	min_storyid = args.min_storyid

	# 取得を開始する Story ID
	max_storyid = args.max_storyid

	# キャプション内の {player} を置換する (未指定なら置換しない)
	replace_player = args.replace_player

	# キャプション内の改行を残す (未指定なら False)
	leave_newline = args.leave_newline

	# 小分けのフォルダ名を Story ID にする (未指定なら False)
	folder_storyid = args.folder_storyid

	print()
	print('Character Name: ' + character_name)
	print('Output Folder: ' + output_folder)
	print()


	# ストーリーリストを取得
	stories_list_html = requests.get(voice_url)
	stories_list = BeautifulSoup(stories_list_html.text, 'html.parser')

	# Story ID ごとに実行
	story_count = 0
	for stories_list_link in stories_list.select('pre a'):

	# Story ID
	story_id = stories_list_link.get_text(strip = True).replace('/', '')

	# ../ はパス
	if story_id == '..':
	continue

	# 取得を開始する Story ID になるまでスキップ
	if (min_storyid != None and int(story_id) < int(min_storyid)):
	continue

	# 取得を終了する Story ID を超えた場合
	if (max_storyid != None and int(story_id) > int(max_storyid)):
	print()
	print('Notice: 取得を終了する Story ID を超えました。取得を終了します。', end = '\n\n')
	break

	# カウントする
	story_count += 1

	# ストーリーの最大取得数を超えた場合
	if (story_maxcount != None and story_count >= story_maxcount):
	print()
	print('Notice: ストーリーの最大取得数を超えました。取得を終了します。', end = '\n\n')
	break

	# ボイスの最大取得数を超えた場合
	if (voice_maxcount != None and len(voices) >= voice_maxcount):
	print()
	print('Notice: ボイスの最大取得数を超えました。取得を終了します。', end = '\n\n')
	break

	print('Story ID: ' + story_id)
	print()

	# ストーリーを取得
	try:
	stories = requests.get(story_url + story_id + '.json').json()
	except Exception:
	print('Notice: ストーリーが見つかりません。スキップします。', end = '\n\n')
	continue


	# アイテムごとに実行
	for story in stories:

	if story['name'] == 'bust':

	# リストに追加
	voices.append({
	'story_id': story_id,
	'character': '',
	'voice': '',
	'caption': '',
	})

	if story['name'] == 'vo':

	# ボイスが登録されていなければ
	if voices[-1]['voice'] == '':
	voices[-1]['voice'] = story['args'][0] + '.m4a'

	if story['name'] == 'print':

	# リストの一番後ろの要素
	voices[-1]['character'] = story['args'][0]
	voices[-1]['caption'] += story['args'][1] # += で追記していく

	# 半角スペースを削除
	voices[-1]['caption'] = voices[-1]['caption'].replace(' ', '')

	# {player} を指定された文字列に置換
	if replace_player != None:
	voices[-1]['caption'] = voices[-1]['caption'].replace('{player}', replace_player)

	# 改行を全角スペースに置換
	if leave_newline == False: # 改行を残さない
	voices[-1]['caption'] = voices[-1]['caption'].replace('\n', '　')

	# 取得結果を出力
	if story['name'] == 'touch':

	if len(voices) > 0:

	# 指定されたキャラクター or 全て取得するモードなら
	if (character_name == voices[-1]['character'] or character_name.upper() == 'ALL'):

	# ボイスが登録されていれば
	if voices[-1]['voice'] != '':

	# 表示
	print('Character: ' + voices[-1]['character'])
	print('Voice: ' + voices[-1]['voice'])
	print('Caption: ' + voices[-1]['caption'])

	# フォルダを作成
	if folder_storyid == True:
	subfolder = story_id # Story ID を使う
	else:
	subfolder = voices[-1]['character'] # キャラクター名を使う
	os.makedirs(output_folder + subfolder, exist_ok = True)

	# ボイスを保存
	url = voice_url + story_id + '/' + voices[-1]['voice']
	download_file(url, output_folder + subfolder + '/' + voices[-1]['voice'])

	# キャプションを同じファイル名で保存
	with open(output_folder + subfolder + '/' + voices[-1]['voice'].replace('.m4a', '.txt'), mode = 'w') as file:
	file.write(voices[-1]['caption'])

	# ボイスの最大取得数を超えた場合、ループを抜ける
	if (voice_maxcount != None and len(voices) >= voice_maxcount):
	break

	print()

	else:

	# 表示
	print('Character: ' + voices[-1]['character'])
	print('Caption: ' + voices[-1]['caption'])

	# ボイスが存在しないため削除
	print('Notice: ボイスがストーリー内に見つかりません。スキップします。')
	del voices[-1]

	else:

	# 指定されたキャラクターではないため削除
	# print('Notice: 指定されたキャラクターがストーリー内に見つかりません。スキップします。')
	del voices[-1]

	# 取得結果
	print()
	print('Extracted Voices: ')
	print('Character: ' + character_name + ' Count: ' + str(len(voices)), end = '\n\n')

	for voice in voices:
	print('Story ID: ' + voice['story_id'])
	print('Voice: ' + voice['voice'])
	print('Caption: ' + voice['caption'])
	print()

	print('Extracted. Finish.')
	print()


	if __name__ == '__main__':
	main()