Created
December 31, 2023 02:42
-
-
Save serg987/60b18bcb0b2eedb6e21fccffe69be319 to your computer and use it in GitHub Desktop.
Tool for encoding tags of mp3 files in the 1-byte charsets to unicode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os.path | |
import re | |
import os | |
import sys | |
import copy | |
import eyed3 | |
from eyed3.id3.tag import Tag | |
helptext = """ | |
Originally taken from https://github.com/volkhin/scripts/blob/master/tag2utf-0.16.py | |
Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode | |
Usage: tag2utf [DIRECTORIES] | |
(By default files will be searched in the current directory) | |
Modes: | |
--restore : program will try to restore tags, that was broken by not right user choice | |
--help, --version, --usage - view this text | |
Version 0.12 | |
Author: Kopats Andrei | |
hlamer@tut.by | |
Bugfix: Yarmak Vladislav | |
chayt@smtp.ru | |
Move to Python 3.9: https://github.com/serg987 | |
Notes: | |
- use latest eyed3 version (0.9.7) | |
- set default encoding to utf-8, | |
- added album_artist field to be transcoded | |
- --restore option was not tested and most likely will not work. Do backups before running this script | |
This program is distributed under the terms of the GPL License. | |
TODO: | |
undo changes, | |
Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r | |
GUI | |
If you need to encode tags from different charset using this version, you can modify script, it's very easy to do. | |
""" | |
# most likely in 2023+ you have only mp3 files with cp1251 codepage. So left koi as an example, but in real world, | |
# working with huge amount of files it is more convenient to have only one code page | |
charsets = {'cp1251': 'c'} #, 'koi8-r': 'k'} | |
# modify it you want to decode tags from other encodings | |
""" | |
Default mode - just to convert tags, that have 1-byte encoding now | |
Backup mode used for restore wrongly converted tags (and optionally decode to right charset. """ | |
restore_mode = False | |
mp3FileName = re.compile('.*(mp3|MP3)$') | |
def recoding_need(bts): | |
"""recoding needed if tags have symbols with 256>ords >127 | |
""" | |
return len([i for i in bts if i > 127]) > 0 | |
def pass_dir(rootdir): | |
tags = [] | |
songs = [] | |
titles = [] | |
artists = [] | |
albums = [] | |
album_artists = [] | |
for song in os.listdir(rootdir): | |
if (os.path.isfile(os.path.join(rootdir, song)) | |
and mp3FileName.match(song)): | |
filename = os.path.join(rootdir, song) | |
tag = Tag() | |
try: | |
if not tag.parse(filename): | |
continue # something wrong with this file | |
except: | |
print('\n', filename, ':error, tag may be corrupted.\n ') | |
continue | |
# checking if fields were already coded to unicode: | |
is_unicode = False | |
try: | |
if tag.title: | |
tag.title.encode('latin-1') | |
if tag.artist: | |
tag.artist.encode('latin-1') | |
if tag.album: | |
tag.album.encode('latin-1') | |
if tag.album_artist: | |
tag.album_artist.encode('latin-1') | |
except UnicodeEncodeError: | |
is_unicode = True | |
if is_unicode: | |
print(f'Tags of file {filename} were already changed to unicode:\n' | |
f'Title: {tag.title}, artist: {tag.artist}, album: {tag.album}, album artist: {tag.album_artist};' | |
f' skipping...') | |
continue | |
if (tag.title and not tag.title.isascii()) \ | |
or (tag.artist and not tag.artist.isascii()) \ | |
or (tag.album and not tag.album.isascii()) \ | |
or (tag.album_artist and not tag.album_artist.isascii()) \ | |
or restore_mode: | |
if not os.access(filename, os.W_OK): | |
print('Warning! Have no access for writing the file ', filename, ' Skipped!') | |
continue | |
tags.append(tag) | |
songs.append(song) | |
if not restore_mode: # normal mode | |
titles.append(tag.title.encode('latin-1') if tag.title else b'') | |
artists.append(tag.artist.encode('latin-1') if tag.artist else b'') | |
albums.append(tag.album.encode('latin-1') if tag.album else b'') | |
album_artists.append(tag.album_artist.encode('latin-1') if tag.album_artist else b'') | |
else: # restore mode | |
titles.append(tag.title) | |
artists.append(tag.artist) | |
albums.append(tag.album) | |
album_artists.append(tag.album_artist) | |
if len(tags) > 0: | |
print(len(tags), ' file(s) found in the ', rootdir) | |
ask_user(tags, songs, titles, artists, albums, album_artists) | |
def update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset=''): | |
for i in range(len(tags)): | |
tags[i].version = eyed3.id3.ID3_V2_4 | |
if not (restore_mode and wrong_charset != ''): # normal mode | |
tags[i].artist = artists[i].decode(charset if recoding_need(artists[i]) else 'utf8') | |
tags[i].album = albums[i].decode(charset if recoding_need(albums[i]) else 'utf8') | |
tags[i].title = titles[i].decode(charset if recoding_need(titles[i]) else 'utf8') | |
tags[i].album_artist = album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'utf8') | |
else: # restore mode | |
tags[i].artist = artists[i].encode(wrongCharset).decode(charset) | |
tags[i].album = albums[i].encode(wrongCharset).decode(charset) | |
tags[i].title = titles[i].encode(wrongCharset).decode(charset) | |
tags[i].album_artist = album_artists[i].encode(wrongCharset).decode(charset) | |
tags[i].save(encoding='utf8') | |
def ask_user(tags, songs, titles, artists, albums, album_artists): | |
charset_list_str = '' | |
if not restore_mode: # normal mode | |
for charset in charsets.keys(): | |
print('\n' + '[' + charsets[charset] + ']' + ' If charset of tags is ' + charset + ':') | |
for i in range(len(songs)): | |
outlst = ['File:', songs[i], 'Title:', titles[i].decode(charset if recoding_need(titles[i]) else 'ascii'), | |
'Artist:', artists[i].decode(charset if recoding_need(artists[i]) else 'ascii'), | |
'Album:', albums[i].decode(charset if recoding_need(albums[i]) else 'ascii'), | |
'Album artist:', | |
album_artists[i].decode(charset if recoding_need(album_artists[i]) else 'ascii')] | |
print(' '.join(outlst), '\n') | |
else: # backup mode | |
for wrong_charset in charsets.keys(): # charset, to which file was wrongly converted | |
for tag_charset in charsets.keys(): # right charset of tag | |
if wrong_charset == tag_charset: | |
continue | |
print('\n' + '[' + charsets[wrong_charset] + charsets[tag_charset] + ']', \ | |
' If charset is ' + tag_charset + ' (wrongly converted to ' + wrong_charset + ')', ':') | |
for i in range(len(songs)): | |
try: | |
print(songs[i], ' ', \ | |
titles[i].encode(wrong_charset).decode(tag_charset), ' ', \ | |
artists[i].encode(wrong_charset).decode(tag_charset), ' ', \ | |
albums[i].encode(wrong_charset).decode(tag_charset), ' ', \ | |
album_artists[i].encode(wrong_charset).decode(tag_charset)) | |
except: | |
print("ERROR:Can't encode tags of " + songs[i]) | |
charset_list_str += " '" + charsets[wrong_charset] + charsets[ | |
tag_charset] + "' - " + tag_charset + ' converted to ' + wrong_charset + '\n' | |
print('\n', "Select charset:\n", charset_list_str, "'s' - skip this file(s)") | |
if len(tags) > 1: | |
print("'m' - manual for every file") | |
while 1: | |
# get user choice and update the tags | |
choice = input() | |
if not restore_mode: # normal mode | |
if choice in charsets.values(): | |
charset = list(charsets.keys())[list(charsets.values()).index(choice)] | |
update_tags(tags, titles, artists, albums, album_artists, charset) | |
break | |
else: # restore mode | |
if (len(choice) == 2) and \ | |
(choice[0] in charsets.values()) and \ | |
(choice[1] in charsets.values()): | |
charset = list(charsets.keys())[list(charsets.values()).index(choice[1])] | |
wrong_charset = list(charsets.keys())[list(charsets.values()).index(choice[0])] | |
update_tags(tags, titles, artists, albums, album_artists, charset, wrong_charset) | |
break | |
if choice == 's': | |
return | |
if choice == 'm' and len(tags) > 1: | |
for i in range(len(tags)): | |
ask_user([tags[i]], [songs[i]], [titles[i]], [artists[i]], [albums[i]], [album_artists[i]],) | |
return | |
# will be executed if no break or return before | |
print('What?') | |
dir_to_add = 'Z:\\Mp3\\By_title\\Russian' | |
if __name__ == '__main__': | |
argsFailed = False | |
rootdirs = [] | |
rootdirs.append(dir_to_add) | |
argv = copy.copy(sys.argv) | |
argv.__delitem__(0) | |
for arg in argv: | |
if (arg == '--usage' or | |
arg == '--help' or | |
arg == '--version'): | |
print(helptext) | |
sys.exit() | |
elif arg == '--restore': | |
restore_mode = True | |
elif os.path.isdir(arg): | |
rootdirs.append(arg) | |
# this need because paths may have start in the working dir or in the root dir | |
elif os.path.isdir(os.path.join(os.getcwd(), arg)): | |
rootdirs.append(os.path.join(os.getcwd(), arg)) | |
else: | |
print("Not right argument '", sys.argv[i], "' It's not a directory.\n Try ", sys.argv[0], " --usage") | |
argsFailed = True | |
if argsFailed: | |
sys.exit() | |
if len(rootdirs) == 0: | |
rootdirs = [os.getcwd()] | |
print('Starting search in the ', os.getcwd()) | |
for rootdir in rootdirs: | |
for root, dirs, files in os.walk(rootdir): | |
pass_dir(root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment