Skip to content

Instantly share code, notes, and snippets.

@leporel
Created November 16, 2022 12:21
Show Gist options
  • Save leporel/43ad6a1cf0581177d4900aa1822e7b0b to your computer and use it in GitHub Desktop.
Save leporel/43ad6a1cf0581177d4900aa1822e7b0b to your computer and use it in GitHub Desktop.
Python3 mp3 cyrillic tags to utf8
#!/usr/bin/python
import os.path
import re
import os
import sys
import copy
helptext = """
Tool for encoding tags of mp3 files in the russian 1-byte charsets to unicode
You need to install python-eyed3 package.
pip install eyed3
Note that on Windows, you also need to install the libmagic binaries.
pip install python-magic-bin
Usage: tag2utf [DIRECTORIES]
(By default files will be searched in the current dirrectory)
Modes:
--restore : programm will try to restore tags, that was broken by not right user choise
--help, --version, --usage - view this text
Version 0.16
Author: Kopats Andrei
hlamer@tut.by
Bugfix: Yarmak Vladislav
chayt@smtp.ru
Modified: leporel
github.com/leporel
This program is distributed under the terms of the GPL License.
TODO:
undo changes,
Charsets will be in the config file or command line, for encoding not only from cp1251 and koi8-r
GUI
If you need to encode tags from different charset using this version, you can modify script, it's very easy to do.
"""
charsets = {'cp1251':'c','koi8-r':'k' }
#modify it if you want decode tags from other encodings
"""
Default mode - just to convert tags, that have 1-byte encoding now
Backup mode used for restore wrongly converted tags (and optionaly decode to right charset. """
restoreMode = False
scanned = 0
updated = 0
try:
import eyed3
from eyed3 import id3
except:
print('You need to install python-eyed3 package.')
sys.exit()
mp3FileName = re.compile ('.*(mp3|MP3)$')
def recodingNeed (strs):
"""recoding needed if tags have symbols with 256>ords >127
"""
# needed = False
for string in strs:
for i in range (len(string)):
if 256>ord (string[i])>127:
return True #nonunicode nonascii
return False
def passDir (rootdir):
global scanned
tags = []
songs = []
titles = []
artists = []
albums = []
for song in os.listdir(rootdir) :
if ( os.path.isfile(os.path.join(rootdir,song))
and mp3FileName.match (song)):
filename = os.path.join(rootdir,song)
tag = id3.Tag()
try:
if not tag.parse(filename):
continue #somthing wrong whith this file
except:
print('\n',filename,':error, may be tag is corrupted.\n ')
continue
scanned += 1
if recodingNeed ([getTitle(tag),getArtist(tag),getAlbum(tag)]) or restoreMode:
if not os.access(filename,os.W_OK):
print('Warning! Have not access for writing file '\
,filename , ' Sciped!')
continue
tags.append (tag)
songs.append (song)
if not restoreMode: #normal mode
titles.append (getTagStr (getTitle (tag)))
artists.append (getTagStr (getArtist (tag)))
albums.append (getTagStr (getAlbum (tag)))
else: #restore mode
titles.append (getTitle (tag))
artists.append (getArtist (tag))
albums.append (getAlbum (tag))
if len (tags) > 0:
print(len(tags),' file(s) finded in the ',rootdir)
askUser (tags,songs,titles,artists,albums)
def getTitle (tagObject):
rs = tagObject._getTitle()
if rs is None:
return ''
else :
return rs
def getArtist (tagObject):
rs = tagObject._getArtist()
if rs is None:
return ''
else :
return rs
def getAlbum (tagObject):
rs = tagObject._getAlbum()
if rs is None:
return ''
else :
return rs
def getTagStr (tagUnicStr):
#gets the 1byte 8bits string, as writed in the tag, from the unicode, returned by tag.get*
ls = []
for i in range (0,len(tagUnicStr)):
if (ord (tagUnicStr[i]) in range(256)):
ls.append (chr (ord (tagUnicStr[i])))
else:
ls.append(tagUnicStr[i])
Str8 = ''.join(ls)
return Str8
def updateTags (tags,titles,artists,albums,charset, wrongCharset = ''):
global updated
for i in range (len(tags)):
if not ( restoreMode and wrongCharset != ''): #normal mode
if (recodingNeed(artists[i])):
tags[i]._setArtist(artists[i].decode(charset))
else:
tags[i]._setArtist(artists[i])
if (recodingNeed(albums[i])):
tags[i]._setAlbum(albums[i].decode(charset))
else:
tags[i]._setAlbum(albums[i])
if (recodingNeed(titles[i])):
tags[i]._setTitle(titles[i].decode(charset))
else:
tags[i]._setTitle(titles[i])
else: #restore mode
tags[i]._setArtist(artists[i].encode(wrongCharset).decode(charset))
tags[i]._setAlbum (albums[i].encode(wrongCharset).decode(charset))
tags[i]._setTitle (titles[i].encode(wrongCharset).decode(charset))
tags[i].save(version=id3.ID3_V2_4,encoding='utf-8')
updated+=1
def askUser (tags,songs, titles,artists,albums):
charsetListStr = ''
if not restoreMode: #normal mode
for charset in charsets.keys():
print('\n'+'['+charsets[charset]+']'+' If charset of tags is '+ charset+ ':')
for i in range (len(songs)):
sys.stdout.write(songs[i])
outlst = [' ']
if (recodingNeed(titles[i])):
outlst.append(titles[i].decode(charset))
else:
outlst.append(titles[i])
outlst.append(' ')
if (recodingNeed(artists[i])):
outlst.append(artists[i].decode(charset))
else:
outlst.append(artists[i])
outlst.append(' ')
if (recodingNeed(albums[i])):
outlst.append(albums[i].decode(charset))
else:
outlst.append(albums[i])
print("".join(outlst))
else: #backup mode
for wrongCharset in charsets.keys(): #charset, to which file was wrongly converted
for tagCharset in charsets.keys(): #right charset of tag
if wrongCharset == tagCharset:
continue
print('\n'+'['+charsets[wrongCharset]+charsets[tagCharset]+']', \
' If charset is '+ tagCharset+' (wrongly converted to '+ wrongCharset+ ')',':')
for i in range (len(songs)):
try:
title, artist, album = "", "", ""
artist = artists[i].encode(wrongCharset).decode(tagCharset)
album = albums[i].encode(wrongCharset).decode(tagCharset)
title = titles[i].encode(wrongCharset).decode(tagCharset)
print(songs[i],' | Title:',\
title,' Artist:',\
artist,' Album:',\
album)
except:
print("ERROR:Can't encode tags of "+songs[i])
charsetListStr += " '"+charsets[wrongCharset]+charsets[tagCharset]+"' - "+tagCharset+' converted to '+wrongCharset+'\n'
print('\n',"Select charset:\n",charsetListStr, "'s' - skip this file(s)")
if len(tags) >1:
print("'m' - manual for every file")
while 1:
#get user choise end update the tags
choise = input()
if not restoreMode : #normal mode
if choise in charsets.values():
charset = charsets.keys()[charsets.values().index(choise)]
updateTags(tags,titles,artists,albums,charset)
break
else: #restore mode
if (len (choise) == 2) and \
(choise[0] in charsets.values()) and \
(choise[1] in charsets.values()):
charset = list(charsets.keys())[list(charsets.values()).index(choise[1])]
wrongCharset = list(charsets.keys())[list(charsets.values()).index(choise[0])]
updateTags(tags,titles,artists,albums,charset,wrongCharset)
break
if choise == 's':
return
if choise == 'm' and len(tags) >1:
for i in range(len(tags)):
askUser ([tags[i]],[songs[i]],[titles[i]],[artists[i]],[albums[i]])
return
#will be executed if no break or return before
print('What?')
argsFailed = False ;
rootdirs = []
argv = copy.copy ( sys.argv)
argv.__delitem__(0)
for arg in argv:
if ( arg == '--usage' or
arg == '--help' or
arg == '--version'):
print(helptext)
sys.exit()
elif arg == '--restore':
restoreMode = True
elif os.path.isdir(arg):
rootdirs.append(arg)
#this need because paths may have start in the working dir or in the root dir
elif os.path.isdir (os.path.join (os.getcwd(),arg)):
rootdirs.append (os.path.join (os.getcwd(),arg))
else:
print("Not right argument '",sys.argv[i],"' It's not a directory.\n Try ",sys.argv[0], " --usage")
argsFailed = True;
if argsFailed:
sys.exit()
if rootdirs == []:
rootdirs = [os.getcwd()]
print('Starting search in the ',os.getcwd())
for rootdir in rootdirs:
for root, dirs, files in os.walk(rootdir):
passDir (root)
print('Scanned files: ',scanned, ' Updated: ',updated)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment