Created
September 1, 2011 20:14
-
-
Save melissaboiko/1187140 to your computer and use it in GitHub Desktop.
python script to convert id3 fields to unicode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.6 | |
# -*- coding: utf-8 -*- | |
# TODO: doesn’t convert to id3v2; eyed3 and mutagen have been fussy | |
# with non-utf tags | |
# TODO: prints, prints everywhere | |
import sys | |
from ID3 import * | |
import mutagen.id3 | |
import chardet | |
def convert(id3_tags, fromenc): | |
changed = False | |
for k, v in id3_tags.items(): | |
try: | |
uv = v.decode(fromenc).encode('utf-8') | |
except UnicodeDecodeError, e: | |
print "WARNING: %s = '%s': Error reading input as %s; ignoring illegal characters" % (k, v, fromenc) | |
uv = v.decode(fromenc, 'ignore').encode('utf-8') | |
if v == uv: | |
continue | |
else: | |
id3_tags[k] = uv | |
print "Changing: %s = '%s' → '%s'" % (k, v, uv) | |
changed = True | |
if changed: | |
id3_tags.write() | |
return changed | |
def convert_auto(id3_tags): | |
changed=False | |
for k, v in id3_tags.items(): | |
det = chardet.detect(v) | |
confidence = det['confidence'] | |
enc = det['encoding'].lower() | |
if (confidence < 0.5): | |
print "Not changing: Confidence too low: %s, %s certain" % (enc, confidence) | |
else: | |
if (enc != 'ascii' and enc != 'utf-8'): | |
uv = v.decode(enc).encode('utf-8') | |
id3_tags[k] = uv | |
print "Changed: %s = '%s' → '%s' (%s, %s certain)" % (k, v, uv, enc, confidence) | |
changed=True | |
if changed: | |
id3_tags.write() | |
return changed | |
def print_detections(id3_tags): | |
for k, v in id3_tags.items(): | |
det = chardet.detect(v) | |
confidence = det['confidence'] | |
enc = det['encoding'].lower() | |
print "%s = '%s' (%s, %s certain)" % (k, v, enc, confidence) | |
if __name__ == '__main__': | |
from optparse import OptionParser | |
p = OptionParser(usage='Usage: %prog [options] <file1.mp3 file2.mp3 ... fileN.mp3>') | |
p.add_option('-f', '--from', dest='fromenc', default='auto', | |
help="Convert from this encoding (default: try to guess)") | |
p.add_option('-d', '--detect-only', dest='detect', | |
action='store_true', default=False, | |
help="Don't change tags; just print results of detection") | |
(options, args) = p.parse_args() | |
for fname in args: | |
try: | |
id3_tags = ID3(fname) | |
except (Exception, InvalidTagError), e: | |
print "Ignoring file: %s: %s" % (fname, e) | |
continue | |
if options.detect: | |
print_detections(id3_tags) | |
elif options.fromenc == 'auto': | |
convert_auto(id3_tags) | |
else: | |
convert(id3_tags, options.fromenc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment