Created
November 16, 2022 12:21
-
-
Save leporel/43ad6a1cf0581177d4900aa1822e7b0b to your computer and use it in GitHub Desktop.
Python3 mp3 cyrillic tags to utf8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os.path | |
import re | |
import os | |
import sys | |
import copy | |
helptext = """ | |
Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode | |
You need to install python-eyed3 package. | |
pip install eyed3 | |
Note that on Windows, you also need to install the libmagic binaries. | |
pip install python-magic-bin | |
Usage: tag2utf [DIRECTORIES] | |
(By default files will be searched in the current dirrectory) | |
Modes: | |
--restore : programm will try to restore tags, that was broken by not right user choise | |
--help, --version, --usage - view this text | |
Version 0.16 | |
Author: Kopats Andrei | |
hlamer@tut.by | |
Bugfix: Yarmak Vladislav | |
chayt@smtp.ru | |
Modified: leporel | |
github.com/leporel | |
This program is distributed under the terms of the GPL License. | |
TODO: | |
undo changes, | |
Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r | |
GUI | |
If you need to encode tags from different charset using this version, you can modify script, it's very easy to do. | |
""" | |
charsets = {'cp1251':'c','koi8-r':'k' } | |
#modify it if you want decode tags from other encodings | |
""" | |
Default mode - just to convert tags, that have 1-byte encoding now | |
Backup mode used for restore wrongly converted tags (and optionaly decode to right charset. """ | |
restoreMode = False | |
scanned = 0 | |
updated = 0 | |
try: | |
import eyed3 | |
from eyed3 import id3 | |
except: | |
print('You need to install python-eyed3 package.') | |
sys.exit() | |
mp3FileName = re.compile ('.*(mp3|MP3)$') | |
def recodingNeed (strs): | |
"""recoding needed if tags have symbols with 256>ords >127 | |
""" | |
# needed = False | |
for string in strs: | |
for i in range (len(string)): | |
if 256>ord (string[i])>127: | |
return True #nonunicode nonascii | |
return False | |
def passDir (rootdir): | |
global scanned | |
tags = [] | |
songs = [] | |
titles = [] | |
artists = [] | |
albums = [] | |
for song in os.listdir(rootdir) : | |
if ( os.path.isfile(os.path.join(rootdir,song)) | |
and mp3FileName.match (song)): | |
filename = os.path.join(rootdir,song) | |
tag = id3.Tag() | |
try: | |
if not tag.parse(filename): | |
continue #somthing wrong whith this file | |
except: | |
print('\n',filename,':error, may be tag is corrupted.\n ') | |
continue | |
scanned += 1 | |
if recodingNeed ([getTitle(tag),getArtist(tag),getAlbum(tag)]) or restoreMode: | |
if not os.access(filename,os.W_OK): | |
print('Warning! Have not access for writing file '\ | |
,filename , ' Sciped!') | |
continue | |
tags.append (tag) | |
songs.append (song) | |
if not restoreMode: #normal mode | |
titles.append (getTagStr (getTitle (tag))) | |
artists.append (getTagStr (getArtist (tag))) | |
albums.append (getTagStr (getAlbum (tag))) | |
else: #restore mode | |
titles.append (getTitle (tag)) | |
artists.append (getArtist (tag)) | |
albums.append (getAlbum (tag)) | |
if len (tags) > 0: | |
print(len(tags),' file(s) finded in the ',rootdir) | |
askUser (tags,songs,titles,artists,albums) | |
def getTitle (tagObject): | |
rs = tagObject._getTitle() | |
if rs is None: | |
return '' | |
else : | |
return rs | |
def getArtist (tagObject): | |
rs = tagObject._getArtist() | |
if rs is None: | |
return '' | |
else : | |
return rs | |
def getAlbum (tagObject): | |
rs = tagObject._getAlbum() | |
if rs is None: | |
return '' | |
else : | |
return rs | |
def getTagStr (tagUnicStr): | |
#gets the 1byte 8bits string, as writed in the tag, from the unicode, returned by tag.get* | |
ls = [] | |
for i in range (0,len(tagUnicStr)): | |
if (ord (tagUnicStr[i]) in range(256)): | |
ls.append (chr (ord (tagUnicStr[i]))) | |
else: | |
ls.append(tagUnicStr[i]) | |
Str8 = ''.join(ls) | |
return Str8 | |
def updateTags (tags,titles,artists,albums,charset, wrongCharset = ''): | |
global updated | |
for i in range (len(tags)): | |
if not ( restoreMode and wrongCharset != ''): #normal mode | |
if (recodingNeed(artists[i])): | |
tags[i]._setArtist(artists[i].decode(charset)) | |
else: | |
tags[i]._setArtist(artists[i]) | |
if (recodingNeed(albums[i])): | |
tags[i]._setAlbum(albums[i].decode(charset)) | |
else: | |
tags[i]._setAlbum(albums[i]) | |
if (recodingNeed(titles[i])): | |
tags[i]._setTitle(titles[i].decode(charset)) | |
else: | |
tags[i]._setTitle(titles[i]) | |
else: #restore mode | |
tags[i]._setArtist(artists[i].encode(wrongCharset).decode(charset)) | |
tags[i]._setAlbum (albums[i].encode(wrongCharset).decode(charset)) | |
tags[i]._setTitle (titles[i].encode(wrongCharset).decode(charset)) | |
tags[i].save(version=id3.ID3_V2_4,encoding='utf-8') | |
updated+=1 | |
def askUser (tags,songs, titles,artists,albums): | |
charsetListStr = '' | |
if not restoreMode: #normal mode | |
for charset in charsets.keys(): | |
print('\n'+'['+charsets[charset]+']'+' If charset of tags is '+ charset+ ':') | |
for i in range (len(songs)): | |
sys.stdout.write(songs[i]) | |
outlst = [' '] | |
if (recodingNeed(titles[i])): | |
outlst.append(titles[i].decode(charset)) | |
else: | |
outlst.append(titles[i]) | |
outlst.append(' ') | |
if (recodingNeed(artists[i])): | |
outlst.append(artists[i].decode(charset)) | |
else: | |
outlst.append(artists[i]) | |
outlst.append(' ') | |
if (recodingNeed(albums[i])): | |
outlst.append(albums[i].decode(charset)) | |
else: | |
outlst.append(albums[i]) | |
print("".join(outlst)) | |
else: #backup mode | |
for wrongCharset in charsets.keys(): #charset, to which file was wrongly converted | |
for tagCharset in charsets.keys(): #right charset of tag | |
if wrongCharset == tagCharset: | |
continue | |
print('\n'+'['+charsets[wrongCharset]+charsets[tagCharset]+']', \ | |
' If charset is '+ tagCharset+' (wrongly converted to '+ wrongCharset+ ')',':') | |
for i in range (len(songs)): | |
try: | |
title, artist, album = "", "", "" | |
artist = artists[i].encode(wrongCharset).decode(tagCharset) | |
album = albums[i].encode(wrongCharset).decode(tagCharset) | |
title = titles[i].encode(wrongCharset).decode(tagCharset) | |
print(songs[i],' | Title:',\ | |
title,' Artist:',\ | |
artist,' Album:',\ | |
album) | |
except: | |
print("ERROR:Can't encode tags of "+songs[i]) | |
charsetListStr += " '"+charsets[wrongCharset]+charsets[tagCharset]+"' - "+tagCharset+' converted to '+wrongCharset+'\n' | |
print('\n',"Select charset:\n",charsetListStr, "'s' - skip this file(s)") | |
if len(tags) >1: | |
print("'m' - manual for every file") | |
while 1: | |
#get user choise end update the tags | |
choise = input() | |
if not restoreMode : #normal mode | |
if choise in charsets.values(): | |
charset = charsets.keys()[charsets.values().index(choise)] | |
updateTags(tags,titles,artists,albums,charset) | |
break | |
else: #restore mode | |
if (len (choise) == 2) and \ | |
(choise[0] in charsets.values()) and \ | |
(choise[1] in charsets.values()): | |
charset = list(charsets.keys())[list(charsets.values()).index(choise[1])] | |
wrongCharset = list(charsets.keys())[list(charsets.values()).index(choise[0])] | |
updateTags(tags,titles,artists,albums,charset,wrongCharset) | |
break | |
if choise == 's': | |
return | |
if choise == 'm' and len(tags) >1: | |
for i in range(len(tags)): | |
askUser ([tags[i]],[songs[i]],[titles[i]],[artists[i]],[albums[i]]) | |
return | |
#will be executed if no break or return before | |
print('What?') | |
argsFailed = False ; | |
rootdirs = [] | |
argv = copy.copy ( sys.argv) | |
argv.__delitem__(0) | |
for arg in argv: | |
if ( arg == '--usage' or | |
arg == '--help' or | |
arg == '--version'): | |
print(helptext) | |
sys.exit() | |
elif arg == '--restore': | |
restoreMode = True | |
elif os.path.isdir(arg): | |
rootdirs.append(arg) | |
#this need because paths may have start in the working dir or in the root dir | |
elif os.path.isdir (os.path.join (os.getcwd(),arg)): | |
rootdirs.append (os.path.join (os.getcwd(),arg)) | |
else: | |
print("Not right argument '",sys.argv[i],"' It's not a directory.\n Try ",sys.argv[0], " --usage") | |
argsFailed = True; | |
if argsFailed: | |
sys.exit() | |
if rootdirs == []: | |
rootdirs = [os.getcwd()] | |
print('Starting search in the ',os.getcwd()) | |
for rootdir in rootdirs: | |
for root, dirs, files in os.walk(rootdir): | |
passDir (root) | |
print('Scanned files: ',scanned, ' Updated: ',updated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment