Last active
December 2, 2024 06:36
-
-
Save bbbradsmith/cd4a511e765889153f9dd9ad1d2eb598 to your computer and use it in GitHub Desktop.
Removes iTunes fingerprinting and metadata from M4A files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# iTunes M4A Strip | |
# | |
# Brad Smith, 2024 | |
# https://rainwarrior.ca | |
# | |
# This program will search for all M4A files under the current directory, | |
# remove all unnecessary metadata, keeping only the AAC music data, | |
# then restoring only the desired tags from the original metadata. | |
# Optionally it will then replace the original files. | |
# | |
# This gets rid of various information in the M4A embedded by iTunes, | |
# such as your name, e-mail address, and account information. | |
# It is not guaranteed that iTunes doesn't try to store information | |
# directly in the AAC music stream via steganography, but at least this | |
# will do a better job of "cleaning" the file than available tag editors, | |
# since much of this information was hidden in many other places aside from | |
# tag metadata. | |
# | |
# Before turning on REPLACE_ORIGINAL, try running it first to see if the | |
# stripped files are working as expected. | |
# | |
# (I don't suspect that the AAC stream is fingerprinted, as I don't feel it | |
# would be worth it for Apple. I only mention it because I can't prove that | |
# they don't. To check for yourself, try comparing your stripped files | |
# with those from another iTunes account, stripped with the same build of | |
# ffmpeg, and see if they match.) | |
# | |
# | |
# Requirements: | |
# | |
# 1. python | |
# info: https://www.python.org/ | |
# | |
# 2. music-tag python library | |
# info: https://pypi.org/project/music-tag/ | |
# install: pip install music-tag | |
# | |
# 3. ffmpeg | |
# info: https://ffmpeg.org/download.html | |
# install: download a binary and put ffmpeg on the path, or in the current directory | |
# | |
import os | |
import shlex | |
import struct | |
import filecmp | |
import subprocess | |
import music_tag | |
# options | |
DIRECTORY = "." # by default we will recursively search the current directory | |
REPLACE_ORIGINAL = False # replace original files | |
OUTPUT_EXT = ".strip.m4a" # naming convension for the stripped files (if not replacing the original) | |
KEEP_TAGS = [ # music-tag tag names that we wish to keep | |
"tracktitle", | |
"artist", | |
"album", | |
"albumartist", | |
"composer", | |
"tracknumber", | |
"totaltracks", | |
"discnumber", | |
"totaldiscs", | |
"genre", | |
"year", | |
"comment", | |
"artwork" | |
] | |
DEBUG = False # display verbose information about file contents | |
DRY_RUN = False # make True to avoid making any changes to disk, useful for debugging | |
TEMP_AAC = "temp.aac" # a temporary AAC file used during processing | |
FFMPEG = "ffmpeg" # command to execute ffmpeg | |
# | |
# functions | |
# | |
# recursively gather a list of files with the desired extension (exts must be lowercase) | |
def gather_files(prefix=".",exts=[".m4a"]): | |
fs = [] | |
for f in os.listdir(prefix): | |
p = os.path.join(prefix,f) | |
if os.path.isdir(p): | |
fs += gather_files(p,exts) | |
else: | |
ei = p.rfind('.') | |
if ei >= 0: | |
ext = p[ei:].lower() | |
if ext in exts: | |
fs.append(p) | |
return fs | |
# debug tags found in file | |
def debug_tags(fn): | |
MAX_RAW_LEN = 128 | |
print("Tags:") | |
tf = music_tag.load_file(fn) | |
for t in tf.tag_map.keys(): | |
s = "<invalid>" | |
try: s = str(tf[t]) | |
except: pass # sometimes str throws an exception on tags (e.g. iTunes year format) | |
print(" [" + t + "] " + s) | |
print("Raw items:") # underlying mutagen items | |
for itm in tf.mfile.items(): print(" " + str(itm)[0:MAX_RAW_LEN]) | |
# recursively dump MP4 box structure, returns False when end of file or error reached | |
def debug_mp4(mf,depth=0,boxsize=-1): | |
MP4_RECURSE = ["MP4 ","moov","trak","udta","mdia","edts","minf","dinf","stbl"] # MP4 box FCCs that can be recursed into | |
fpos = 0 | |
while (boxsize < 0) or (fpos < boxsize): | |
mbpos = mf.tell() | |
headbytes = mf.read(8) | |
try: | |
if len(headbytes) == 0 and boxsize < 0: return False # clean end of file | |
if len(headbytes) < 8: raise Exception("EOF in MP4 box header") | |
mbsize = struct.unpack(">L",headbytes[0:4])[0] | |
if (mbsize < 8): raise Exception("Invalid MP4 box size at %08X" % (mbpos)) | |
if (boxsize >= 0) and (fpos + mbsize) > boxsize: raise Exception("MP4 box at %08X too big for enclosing box" % (mbpos)) | |
except Exception as ex: | |
print(" MP4 structure corrupt: "+str(ex)) | |
return False # error | |
fcc = "".join([chr(c) if (c >= 0x20 and c < 0x7F) else '*' for c in headbytes[4:8]]) | |
print(" %sMP4[%s] at %08X: %d bytes" % (". "*depth,fcc,mbpos,mbsize)) | |
if fcc in MP4_RECURSE: | |
if not debug_mp4(mf,depth+1,mbsize-8): | |
return False | |
else: | |
mf.seek(mbsize-8,os.SEEK_CUR) | |
fpos += mbsize | |
return (fpos == boxsize) # clean end of box | |
# debug information found in M4A file | |
def debug_m4a(fn): | |
print("Debug: "+fn) | |
debug_tags(fn) | |
debug_mp4(open(fn,"rb"),0) | |
# strip the M4A, keep only the desired tags | |
def strip_m4a(fn_in,fn_out,fn_aac=TEMP_AAC,keep_tags=KEEP_TAGS,replace=REPLACE_ORIGINAL): | |
if DEBUG: debug_m4a(fn_in) | |
try: | |
HIDECONSOLE = subprocess.CREATE_NO_WINDOW # change to 0 if you want to see see the console commands | |
# use ffmpeg to make a lossless copy of the AAC data to a temporary file, then back to a clean M4A | |
command1 = FFMPEG + " -y -i \"" + fn_in + "\" -acodec copy \"" + TEMP_AAC + "\"" | |
command2 = FFMPEG + " -y -i \"" + TEMP_AAC + "\" -acodec copy \"" + fn_out + "\"" | |
if DRY_RUN: | |
print("Command: " + command1) | |
print("Command: " + command2) | |
print("Dry run: " + fn_in) | |
return | |
result = subprocess.run(shlex.split(command1),capture_output=True,text=True,creationflags=HIDECONSOLE) | |
if result.returncode != 0: | |
print(result.stderr) | |
raise Exception(command1) | |
result = subprocess.run(shlex.split(command2),capture_output=True,text=True,creationflags=HIDECONSOLE) | |
if result.returncode != 0: | |
print(result.stderr) | |
raise Exception(command2) | |
# copy the desired tags from the original file | |
if keep_tags and len(keep_tags) > 0: | |
ti = music_tag.load_file(fn_in) | |
to = music_tag.load_file(fn_out) | |
for tag in keep_tags: | |
if tag in ti.tag_map: | |
to.raw[tag] = ti.raw[tag] | |
to.save() | |
if DEBUG: debug_m4a(fn_out) | |
if replace: | |
if not filecmp.cmp(fn_in,fn_out,False): | |
# replace original with stripped version | |
os.remove(fn_in) | |
os.rename(fn_out,fn_in) | |
print("Stripped: %s" % (fn_in)) | |
else: | |
# stripped version is identical, leave the original alone | |
os.remove(fn_out) | |
print("Already stripped: %s" % (fn_in)) | |
else: | |
print("Stripped: %s" % (fn_out)) | |
except Exception as ex: | |
print("Error stripping mp4: %s (%s)" % (fn_in,str(ex))) | |
# | |
# main program | |
# | |
fs = gather_files(DIRECTORY,[".m4a"]) | |
fs = [fn for fn in fs if not fn.endswith(OUTPUT_EXT)] # skip stripped files | |
print("%d files found..." % (len(fs))) | |
for fn in fs: | |
fo = fn[0:fn.rfind('.')] + OUTPUT_EXT | |
strip_m4a(fn,fo,TEMP_AAC,KEEP_TAGS,REPLACE_ORIGINAL) | |
print("Done.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
<3