Skip to content

Instantly share code, notes, and snippets.

@serg987
Created December 31, 2023 02:42
Show Gist options
  • Save serg987/60b18bcb0b2eedb6e21fccffe69be319 to your computer and use it in GitHub Desktop.
Save serg987/60b18bcb0b2eedb6e21fccffe69be319 to your computer and use it in GitHub Desktop.
Tool for encoding tags of mp3 files in the 1-byte charsets to unicode
#!/usr/bin/python
import os.path
import re
import os
import sys
import copy
import eyed3
from eyed3.id3.tag import Tag
helptext = """
Originally taken from https://github.com/volkhin/scripts/blob/master/tag2utf-0.16.py
Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode
Usage: tag2utf [DIRECTORIES]
(By default files will be searched in the current directory)
Modes:
--restore : program will try to restore tags, that was broken by not right user choice
--help, --version, --usage - view this text
Version 0.12
Author: Kopats Andrei
hlamer@tut.by
Bugfix: Yarmak Vladislav
chayt@smtp.ru
Move to Python 3.9: https://github.com/serg987
Notes:
- use latest eyed3 version (0.9.7)
- set default encoding to utf-8,
- added album_artist field to be transcoded
- --restore option was not tested and most likely will not work. Do backups before running this script
This program is distributed under the terms of the GPL License.
TODO:
undo changes,
Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r
GUI
If you need to encode tags from different charset using this version, you can modify script, it's very easy to do.
"""
# most likely in 2023+ you have only mp3 files with cp1251 codepage. So left koi as an example, but in real world,
# working with huge amount of files it is more convenient to have only one code page
charsets = {'cp1251': 'c'} #, 'koi8-r': 'k'}
# modify it you want to decode tags from other encodings
"""
Default mode - just to convert tags, that have 1-byte encoding now
Backup mode used for restore wrongly converted tags (and optionally decode to right charset. """
restore_mode = False
mp3FileName = re.compile('.*(mp3|MP3)$')
def recoding_need(bts):
"""recoding needed if tags have symbols with 256>ords >127
"""
return len([i for i in bts if i > 127]) > 0
def pass_dir(rootdir):
tags = []
songs = []
titles = []
artists = []
albums = []
album_artists = []
for song in os.listdir(rootdir):
if (os.path.isfile(os.path.join(rootdir, song))
and mp3FileName.match(song)):
filename = os.path.join(rootdir, song)
tag = Tag()
try:
if not tag.parse(filename):
continue # something wrong with this file
except:
print('\n', filename, ':error, tag may be corrupted.\n ')
continue
# checking if fields were already coded to unicode:
is_unicode = False
try:
if tag.title:
tag.title.encode('latin-1')
if tag.artist:
tag.artist.encode('latin-1')
if tag.album:
tag.album.encode('latin-1')
if tag.album_artist:
tag.album_artist.encode('latin-1')
except UnicodeEncodeError:
is_unicode = True
if is_unicode:
print(f'Tags of file {filename} were already changed to unicode:\n'
f'Title: {tag.title}, artist: {tag.artist}, album: {tag.album}, album artist: {tag.album_artist};'
f' skipping...')
continue
if (tag.title and not tag.title.isascii()) \
or (tag.artist and not tag.artist.isascii()) \
or (tag.album and not tag.album.isascii()) \
or (tag.album_artist and not tag.album_artist.isascii()) \
or restore_mode:
if not os.access(filename, os.W_OK):
print('Warning! Have no access for writing the file ', filename, ' Skipped!')
continue
tags.append(tag)
songs.append(song)
if not restore_mode: # normal mode
titles.append(tag.title.encode('latin-1') if tag.title else b'')
artists.append(tag.artist.encode('latin-1') if tag.artist else b'')
albums.append(tag.album.encode('latin-1') if tag.album else b'')
album_artists.append(tag.album_artist.encode('latin-1') if tag.album_artist else b'')
else: # restore mode
titles.append(tag.title)
artists.append(tag.artist)
albums.append(tag.album)
album_artists.append(tag.album_artist)
if len(tags) > 0:
print(len(tags), ' file(s) found in the ', rootdir)
ask_user(tags, songs, titles, artists, albums, album_artists)
def update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset=''):
for i in range(len(tags)):
tags[i].version = eyed3.id3.ID3_V2_4
if not (restore_mode and wrong_charset != ''): # normal mode
tags[i].artist = artists[i].decode(charset if recoding_need(artists[i]) else 'utf8')
tags[i].album = albums[i].decode(charset if recoding_need(albums[i]) else 'utf8')
tags[i].title = titles[i].decode(charset if recoding_need(titles[i]) else 'utf8')
tags[i].album_artist = album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'utf8')
else: # restore mode
tags[i].artist = artists[i].encode(wrongCharset).decode(charset)
tags[i].album = albums[i].encode(wrongCharset).decode(charset)
tags[i].title = titles[i].encode(wrongCharset).decode(charset)
tags[i].album_artist = album_artists[i].encode(wrongCharset).decode(charset)
tags[i].save(encoding='utf8')
def ask_user(tags, songs, titles, artists, albums, album_artists):
charset_list_str = ''
if not restore_mode: # normal mode
for charset in charsets.keys():
print('\n' + '[' + charsets[charset] + ']' + ' If charset of tags is ' + charset + ':')
for i in range(len(songs)):
outlst = ['File:', songs[i], 'Title:', titles[i].decode(charset if recoding_need(titles[i]) else 'ascii'),
'Artist:', artists[i].decode(charset if recoding_need(artists[i]) else 'ascii'),
'Album:', albums[i].decode(charset if recoding_need(albums[i]) else 'ascii'),
'Album artist:',
album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'ascii')]
print(' '.join(outlst), '\n')
else: # backup mode
for wrong_charset in charsets.keys(): # charset, to which file was wrongly converted
for tag_charset in charsets.keys(): # right charset of tag
if wrong_charset == tag_charset:
continue
print('\n' + '[' + charsets[wrong_charset] + charsets[tag_charset] + ']', \
' If charset is ' + tag_charset + ' (wrongly converted to ' + wrong_charset + ')', ':')
for i in range(len(songs)):
try:
print(songs[i], ' ', \
titles[i].encode(wrong_charset).decode(tag_charset), ' ', \
artists[i].encode(wrong_charset).decode(tag_charset), ' ', \
albums[i].encode(wrong_charset).decode(tag_charset), ' ', \
album_artists[i].encode(wrong_charset).decode(tag_charset))
except:
print("ERROR:Can't encode tags of " + songs[i])
charset_list_str += " '" + charsets[wrong_charset] + charsets[
tag_charset] + "' - " + tag_charset + ' converted to ' + wrong_charset + '\n'
print('\n', "Select charset:\n", charset_list_str, "'s' - skip this file(s)")
if len(tags) > 1:
print("'m' - manual for every file")
while 1:
# get user choice and update the tags
choice = input()
if not restore_mode: # normal mode
if choice in charsets.values():
charset = list(charsets.keys())[list(charsets.values()).index(choice)]
update_tags(tags, titles, artists, albums, album_artists, charset)
break
else: # restore mode
if (len(choice) == 2) and \
(choice[0] in charsets.values()) and \
(choice[1] in charsets.values()):
charset = list(charsets.keys())[list(charsets.values()).index(choice[1])]
wrong_charset = list(charsets.keys())[list(charsets.values()).index(choice[0])]
update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset)
break
if choice == 's':
return
if choice == 'm' and len(tags) > 1:
for i in range(len(tags)):
ask_user([tags[i]], [songs[i]], [titles[i]], [artists[i]], [albums[i]], [album_artists[i]],)
return
# will be executed if no break or return before
print('What?')
dir_to_add = 'Z:\\Mp3\\By_title\\Russian'
if __name__ == '__main__':
argsFailed = False
rootdirs = []
rootdirs.append(dir_to_add)
argv = copy.copy(sys.argv)
argv.__delitem__(0)
for arg in argv:
if (arg == '--usage' or
arg == '--help' or
arg == '--version'):
print(helptext)
sys.exit()
elif arg == '--restore':
restore_mode = True
elif os.path.isdir(arg):
rootdirs.append(arg)
# this need because paths may have start in the working dir or in the root dir
elif os.path.isdir(os.path.join(os.getcwd(), arg)):
rootdirs.append(os.path.join(os.getcwd(), arg))
else:
print("Not right argument '", sys.argv[i], "' It's not a directory.\n Try ", sys.argv[0], " --usage")
argsFailed = True
if argsFailed:
sys.exit()
if len(rootdirs) == 0:
rootdirs = [os.getcwd()]
print('Starting search in the ', os.getcwd())
for rootdir in rootdirs:
for root, dirs, files in os.walk(rootdir):
pass_dir(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment