serg987/tag2utf-0.17.py

## tag2utf-0.17.py
#!/usr/bin/python
import os.path
import re
import os
import sys
import copy
import eyed3
from eyed3.id3.tag import Tag

helptext = """
Originally taken from https://github.com/volkhin/scripts/blob/master/tag2utf-0.16.py

Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode

Usage: tag2utf [DIRECTORIES]
(By default files will be searched in the current directory)
Modes:
--restore  : program will try to restore tags, that was broken by not right user choice

--help, --version, --usage  - view this text

Version 0.12
Author: Kopats Andrei
	hlamer@tut.by
Bugfix: Yarmak Vladislav
	chayt@smtp.ru
Move to Python 3.9: https://github.com/serg987
    Notes:
        - use latest eyed3 version (0.9.7)
        - set default encoding to utf-8,
        - added album_artist field to be transcoded
        - --restore option was not tested and most likely will not work. Do backups before running this script

This program is distributed under the terms of the GPL License.

TODO:
	undo changes,
	Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r
	GUI
If you need to encode tags from different charset using this version, you can modify script, it's very easy to do.
"""

# most likely in 2023+ you have only mp3 files with cp1251 codepage. So left koi as an example, but in real world,
# working with huge amount of files it is more convenient to have only one code page
charsets = {'cp1251': 'c'} #, 'koi8-r': 'k'}
# modify it you want to decode tags from other encodings

"""
Default mode - just to convert tags, that have 1-byte encoding now
Backup mode used for restore wrongly converted tags (and optionally decode to right charset. """
restore_mode = False

mp3FileName = re.compile('.*(mp3|MP3)$')


def recoding_need(bts):
    """recoding needed if tags have symbols with 256>ords >127
    """
    return len([i for i in bts if i > 127]) > 0


def pass_dir(rootdir):
    tags = []
    songs = []
    titles = []
    artists = []
    albums = []
    album_artists = []
    for song in os.listdir(rootdir):
        if (os.path.isfile(os.path.join(rootdir, song))
                and mp3FileName.match(song)):
            filename = os.path.join(rootdir, song)
            tag = Tag()
            try:
                if not tag.parse(filename):
                    continue  # something wrong with this file
            except:
                print('\n', filename, ':error, tag may be corrupted.\n ')
                continue
            # checking if fields were already coded to unicode:
            is_unicode = False
            try:
                if tag.title:
                    tag.title.encode('latin-1')
                if tag.artist:
                    tag.artist.encode('latin-1')
                if tag.album:
                    tag.album.encode('latin-1')
                if tag.album_artist:
                    tag.album_artist.encode('latin-1')
            except UnicodeEncodeError:
                is_unicode = True
            if is_unicode:
                print(f'Tags of file {filename} were already changed to unicode:\n'
                      f'Title: {tag.title}, artist: {tag.artist}, album: {tag.album}, album artist: {tag.album_artist};'
                      f' skipping...')
                continue
            if (tag.title and not tag.title.isascii()) \
                or (tag.artist and not tag.artist.isascii()) \
                or (tag.album and not tag.album.isascii()) \
                or (tag.album_artist and not tag.album_artist.isascii()) \
                or restore_mode:
                if not os.access(filename, os.W_OK):
                    print('Warning! Have no access for writing the file ', filename, ' Skipped!')
                    continue
                tags.append(tag)
                songs.append(song)
                if not restore_mode:  # normal mode
                    titles.append(tag.title.encode('latin-1') if tag.title else b'')
                    artists.append(tag.artist.encode('latin-1') if tag.artist else b'')
                    albums.append(tag.album.encode('latin-1') if tag.album else b'')
                    album_artists.append(tag.album_artist.encode('latin-1') if tag.album_artist else b'')
                else:  # restore mode
                    titles.append(tag.title)
                    artists.append(tag.artist)
                    albums.append(tag.album)
                    album_artists.append(tag.album_artist)
    if len(tags) > 0:
        print(len(tags), ' file(s) found in the ', rootdir)
        ask_user(tags, songs, titles, artists, albums, album_artists)


def update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset=''):
    for i in range(len(tags)):
        tags[i].version = eyed3.id3.ID3_V2_4
        if not (restore_mode and wrong_charset != ''):  # normal mode
            tags[i].artist = artists[i].decode(charset if recoding_need(artists[i]) else 'utf8')
            tags[i].album = albums[i].decode(charset if recoding_need(albums[i]) else 'utf8')
            tags[i].title = titles[i].decode(charset if recoding_need(titles[i]) else 'utf8')
            tags[i].album_artist = album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'utf8')
        else:  # restore mode
            tags[i].artist = artists[i].encode(wrongCharset).decode(charset)
            tags[i].album = albums[i].encode(wrongCharset).decode(charset)
            tags[i].title = titles[i].encode(wrongCharset).decode(charset)
            tags[i].album_artist = album_artists[i].encode(wrongCharset).decode(charset)
        tags[i].save(encoding='utf8')


def ask_user(tags, songs, titles, artists, albums, album_artists):
    charset_list_str = ''
    if not restore_mode:  # normal mode
        for charset in charsets.keys():
            print('\n' + '[' + charsets[charset] + ']' + '   If charset of tags is ' + charset + ':')
            for i in range(len(songs)):
                outlst = ['File:', songs[i], 'Title:', titles[i].decode(charset if recoding_need(titles[i]) else 'ascii'),
                          'Artist:', artists[i].decode(charset if recoding_need(artists[i]) else 'ascii'),
                          'Album:', albums[i].decode(charset if recoding_need(albums[i]) else 'ascii'),
                          'Album artist:',
                          album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'ascii')]
                print(' '.join(outlst), '\n')
    else:  # backup mode
        for wrong_charset in charsets.keys():  # charset, to which file was wrongly converted
            for tag_charset in charsets.keys():  # right charset of tag
                if wrong_charset == tag_charset:
                    continue
                print('\n' + '[' + charsets[wrong_charset] + charsets[tag_charset] + ']', \
                      '   If charset is ' + tag_charset + ' (wrongly converted to ' + wrong_charset + ')', ':')
                for i in range(len(songs)):
                    try:
                        print(songs[i], ' ', \
                              titles[i].encode(wrong_charset).decode(tag_charset), ' ', \
                              artists[i].encode(wrong_charset).decode(tag_charset), ' ', \
                              albums[i].encode(wrong_charset).decode(tag_charset), ' ', \
                              album_artists[i].encode(wrong_charset).decode(tag_charset))
                    except:
                        print("ERROR:Can't encode tags of " + songs[i])
                charset_list_str += "   '" + charsets[wrong_charset] + charsets[
                    tag_charset] + "' - " + tag_charset + ' converted to ' + wrong_charset + '\n'

    print('\n', "Select charset:\n", charset_list_str, "'s' - skip this file(s)")
    if len(tags) > 1:
        print("'m' - manual for every file")
    while 1:
        # get user choice and update the tags
        choice = input()
        if not restore_mode:  # normal mode
            if choice in charsets.values():
                charset = list(charsets.keys())[list(charsets.values()).index(choice)]
                update_tags(tags, titles, artists, albums, album_artists, charset)
                break
        else:  # restore mode
            if (len(choice) == 2) and \
                    (choice[0] in charsets.values()) and \
                    (choice[1] in charsets.values()):
                charset = list(charsets.keys())[list(charsets.values()).index(choice[1])]
                wrong_charset = list(charsets.keys())[list(charsets.values()).index(choice[0])]
                update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset)
                break
        if choice == 's':
            return
        if choice == 'm' and len(tags) > 1:
            for i in range(len(tags)):
                ask_user([tags[i]], [songs[i]], [titles[i]], [artists[i]], [albums[i]], [album_artists[i]],)
                return
        # will be executed if no break or return before
        print('What?')

dir_to_add = 'Z:\\Mp3\\By_title\\Russian'

if __name__ == '__main__':
    argsFailed = False
    rootdirs = []
    rootdirs.append(dir_to_add)
    argv = copy.copy(sys.argv)
    argv.__delitem__(0)
    for arg in argv:
        if (arg == '--usage' or
                arg == '--help' or
                arg == '--version'):
            print(helptext)
            sys.exit()
        elif arg == '--restore':
            restore_mode = True
        elif os.path.isdir(arg):
            rootdirs.append(arg)
        # this need because paths may have start in the working dir or in the root dir
        elif os.path.isdir(os.path.join(os.getcwd(), arg)):
            rootdirs.append(os.path.join(os.getcwd(), arg))
        else:
            print("Not right argument '", sys.argv[i], "' It's not a directory.\n Try ", sys.argv[0], " --usage")
            argsFailed = True
    if argsFailed:
        sys.exit()
    if len(rootdirs) == 0:
        rootdirs = [os.getcwd()]
        print('Starting search in the ', os.getcwd())
    for rootdir in rootdirs:
        for root, dirs, files in os.walk(rootdir):
            pass_dir(root)
	#!/usr/bin/python
	import os.path
	import re
	import os
	import sys
	import copy
	import eyed3
	from eyed3.id3.tag import Tag

	helptext = """
	Originally taken from https://github.com/volkhin/scripts/blob/master/tag2utf-0.16.py

	Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode

	Usage: tag2utf [DIRECTORIES]
	(By default files will be searched in the current directory)
	Modes:
	--restore : program will try to restore tags, that was broken by not right user choice

	--help, --version, --usage - view this text

	Version 0.12
	Author: Kopats Andrei
	hlamer@tut.by
	Bugfix: Yarmak Vladislav
	chayt@smtp.ru
	Move to Python 3.9: https://github.com/serg987
	Notes:
	- use latest eyed3 version (0.9.7)
	- set default encoding to utf-8,
	- added album_artist field to be transcoded
	- --restore option was not tested and most likely will not work. Do backups before running this script

	This program is distributed under the terms of the GPL License.

	TODO:
	undo changes,
	Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r
	GUI
	If you need to encode tags from different charset using this version, you can modify script, it's very easy to do.
	"""

	# most likely in 2023+ you have only mp3 files with cp1251 codepage. So left koi as an example, but in real world,
	# working with huge amount of files it is more convenient to have only one code page
	charsets = {'cp1251': 'c'} #, 'koi8-r': 'k'}
	# modify it you want to decode tags from other encodings

	"""
	Default mode - just to convert tags, that have 1-byte encoding now
	Backup mode used for restore wrongly converted tags (and optionally decode to right charset. """
	restore_mode = False

	mp3FileName = re.compile('.*(mp3\|MP3)$')


	def recoding_need(bts):
	"""recoding needed if tags have symbols with 256>ords >127
	"""
	return len([i for i in bts if i > 127]) > 0


	def pass_dir(rootdir):
	tags = []
	songs = []
	titles = []
	artists = []
	albums = []
	album_artists = []
	for song in os.listdir(rootdir):
	if (os.path.isfile(os.path.join(rootdir, song))
	and mp3FileName.match(song)):
	filename = os.path.join(rootdir, song)
	tag = Tag()
	try:
	if not tag.parse(filename):
	continue # something wrong with this file
	except:
	print('\n', filename, ':error, tag may be corrupted.\n ')
	continue
	# checking if fields were already coded to unicode:
	is_unicode = False
	try:
	if tag.title:
	tag.title.encode('latin-1')
	if tag.artist:
	tag.artist.encode('latin-1')
	if tag.album:
	tag.album.encode('latin-1')
	if tag.album_artist:
	tag.album_artist.encode('latin-1')
	except UnicodeEncodeError:
	is_unicode = True
	if is_unicode:
	print(f'Tags of file {filename} were already changed to unicode:\n'
	f'Title: {tag.title}, artist: {tag.artist}, album: {tag.album}, album artist: {tag.album_artist};'
	f' skipping...')
	continue
	if (tag.title and not tag.title.isascii()) \
	or (tag.artist and not tag.artist.isascii()) \
	or (tag.album and not tag.album.isascii()) \
	or (tag.album_artist and not tag.album_artist.isascii()) \
	or restore_mode:
	if not os.access(filename, os.W_OK):
	print('Warning! Have no access for writing the file ', filename, ' Skipped!')
	continue
	tags.append(tag)
	songs.append(song)
	if not restore_mode: # normal mode
	titles.append(tag.title.encode('latin-1') if tag.title else b'')
	artists.append(tag.artist.encode('latin-1') if tag.artist else b'')
	albums.append(tag.album.encode('latin-1') if tag.album else b'')
	album_artists.append(tag.album_artist.encode('latin-1') if tag.album_artist else b'')
	else: # restore mode
	titles.append(tag.title)
	artists.append(tag.artist)
	albums.append(tag.album)
	album_artists.append(tag.album_artist)
	if len(tags) > 0:
	print(len(tags), ' file(s) found in the ', rootdir)
	ask_user(tags, songs, titles, artists, albums, album_artists)


	def update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset=''):
	for i in range(len(tags)):
	tags[i].version = eyed3.id3.ID3_V2_4
	if not (restore_mode and wrong_charset != ''): # normal mode
	tags[i].artist = artists[i].decode(charset if recoding_need(artists[i]) else 'utf8')
	tags[i].album = albums[i].decode(charset if recoding_need(albums[i]) else 'utf8')
	tags[i].title = titles[i].decode(charset if recoding_need(titles[i]) else 'utf8')
	tags[i].album_artist = album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'utf8')
	else: # restore mode
	tags[i].artist = artists[i].encode(wrongCharset).decode(charset)
	tags[i].album = albums[i].encode(wrongCharset).decode(charset)
	tags[i].title = titles[i].encode(wrongCharset).decode(charset)
	tags[i].album_artist = album_artists[i].encode(wrongCharset).decode(charset)
	tags[i].save(encoding='utf8')


	def ask_user(tags, songs, titles, artists, albums, album_artists):
	charset_list_str = ''
	if not restore_mode: # normal mode
	for charset in charsets.keys():
	print('\n' + '[' + charsets[charset] + ']' + ' If charset of tags is ' + charset + ':')
	for i in range(len(songs)):
	outlst = ['File:', songs[i], 'Title:', titles[i].decode(charset if recoding_need(titles[i]) else 'ascii'),
	'Artist:', artists[i].decode(charset if recoding_need(artists[i]) else 'ascii'),
	'Album:', albums[i].decode(charset if recoding_need(albums[i]) else 'ascii'),
	'Album artist:',
	album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'ascii')]
	print(' '.join(outlst), '\n')
	else: # backup mode
	for wrong_charset in charsets.keys(): # charset, to which file was wrongly converted
	for tag_charset in charsets.keys(): # right charset of tag
	if wrong_charset == tag_charset:
	continue
	print('\n' + '[' + charsets[wrong_charset] + charsets[tag_charset] + ']', \
	' If charset is ' + tag_charset + ' (wrongly converted to ' + wrong_charset + ')', ':')
	for i in range(len(songs)):
	try:
	print(songs[i], ' ', \
	titles[i].encode(wrong_charset).decode(tag_charset), ' ', \
	artists[i].encode(wrong_charset).decode(tag_charset), ' ', \
	albums[i].encode(wrong_charset).decode(tag_charset), ' ', \
	album_artists[i].encode(wrong_charset).decode(tag_charset))
	except:
	print("ERROR:Can't encode tags of " + songs[i])
	charset_list_str += " '" + charsets[wrong_charset] + charsets[
	tag_charset] + "' - " + tag_charset + ' converted to ' + wrong_charset + '\n'

	print('\n', "Select charset:\n", charset_list_str, "'s' - skip this file(s)")
	if len(tags) > 1:
	print("'m' - manual for every file")
	while 1:
	# get user choice and update the tags
	choice = input()
	if not restore_mode: # normal mode
	if choice in charsets.values():
	charset = list(charsets.keys())[list(charsets.values()).index(choice)]
	update_tags(tags, titles, artists, albums, album_artists, charset)
	break
	else: # restore mode
	if (len(choice) == 2) and \
	(choice[0] in charsets.values()) and \
	(choice[1] in charsets.values()):
	charset = list(charsets.keys())[list(charsets.values()).index(choice[1])]
	wrong_charset = list(charsets.keys())[list(charsets.values()).index(choice[0])]
	update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset)
	break
	if choice == 's':
	return
	if choice == 'm' and len(tags) > 1:
	for i in range(len(tags)):
	ask_user([tags[i]], [songs[i]], [titles[i]], [artists[i]], [albums[i]], [album_artists[i]],)
	return
	# will be executed if no break or return before
	print('What?')

	dir_to_add = 'Z:\\Mp3\\By_title\\Russian'

	if __name__ == '__main__':
	argsFailed = False
	rootdirs = []
	rootdirs.append(dir_to_add)
	argv = copy.copy(sys.argv)
	argv.__delitem__(0)
	for arg in argv:
	if (arg == '--usage' or
	arg == '--help' or
	arg == '--version'):
	print(helptext)
	sys.exit()
	elif arg == '--restore':
	restore_mode = True
	elif os.path.isdir(arg):
	rootdirs.append(arg)
	# this need because paths may have start in the working dir or in the root dir
	elif os.path.isdir(os.path.join(os.getcwd(), arg)):
	rootdirs.append(os.path.join(os.getcwd(), arg))
	else:
	print("Not right argument '", sys.argv[i], "' It's not a directory.\n Try ", sys.argv[0], " --usage")
	argsFailed = True
	if argsFailed:
	sys.exit()
	if len(rootdirs) == 0:
	rootdirs = [os.getcwd()]
	print('Starting search in the ', os.getcwd())
	for rootdir in rootdirs:
	for root, dirs, files in os.walk(rootdir):
	pass_dir(root)