Skip to content

Instantly share code, notes, and snippets.

@bbbradsmith
Last active December 2, 2024 06:36
Show Gist options
  • Save bbbradsmith/cd4a511e765889153f9dd9ad1d2eb598 to your computer and use it in GitHub Desktop.
Save bbbradsmith/cd4a511e765889153f9dd9ad1d2eb598 to your computer and use it in GitHub Desktop.
Removes iTunes fingerprinting and metadata from M4A files
#!/usr/bin/env python3
#
# iTunes M4A Strip
#
# Brad Smith, 2024
# https://rainwarrior.ca
#
# This program will search for all M4A files under the current directory,
# remove all unnecessary metadata, keeping only the AAC music data,
# then restoring only the desired tags from the original metadata.
# Optionally it will then replace the original files.
#
# This gets rid of various information in the M4A embedded by iTunes,
# such as your name, e-mail address, and account information.
# It is not guaranteed that iTunes doesn't try to store information
# directly in the AAC music stream via steganography, but at least this
# will do a better job of "cleaning" the file than available tag editors,
# since much of this information was hidden in many other places aside from
# tag metadata.
#
# Before turning on REPLACE_ORIGINAL, try running it first to see if the
# stripped files are working as expected.
#
# (I don't suspect that the AAC stream is fingerprinted, as I don't feel it
# would be worth it for Apple. I only mention it because I can't prove that
# they don't. To check for yourself, try comparing your stripped files
# with those from another iTunes account, stripped with the same build of
# ffmpeg, and see if they match.)
#
#
# Requirements:
#
# 1. python
# info: https://www.python.org/
#
# 2. music-tag python library
# info: https://pypi.org/project/music-tag/
# install: pip install music-tag
#
# 3. ffmpeg
# info: https://ffmpeg.org/download.html
# install: download a binary and put ffmpeg on the path, or in the current directory
#
import os
import shlex
import struct
import filecmp
import subprocess
import music_tag
# options
DIRECTORY = "." # by default we will recursively search the current directory
REPLACE_ORIGINAL = False # replace original files
OUTPUT_EXT = ".strip.m4a" # naming convension for the stripped files (if not replacing the original)
KEEP_TAGS = [ # music-tag tag names that we wish to keep
"tracktitle",
"artist",
"album",
"albumartist",
"composer",
"tracknumber",
"totaltracks",
"discnumber",
"totaldiscs",
"genre",
"year",
"comment",
"artwork"
]
DEBUG = False # display verbose information about file contents
DRY_RUN = False # make True to avoid making any changes to disk, useful for debugging
TEMP_AAC = "temp.aac" # a temporary AAC file used during processing
FFMPEG = "ffmpeg" # command to execute ffmpeg
#
# functions
#
# recursively gather a list of files with the desired extension (exts must be lowercase)
def gather_files(prefix=".",exts=[".m4a"]):
fs = []
for f in os.listdir(prefix):
p = os.path.join(prefix,f)
if os.path.isdir(p):
fs += gather_files(p,exts)
else:
ei = p.rfind('.')
if ei >= 0:
ext = p[ei:].lower()
if ext in exts:
fs.append(p)
return fs
# debug tags found in file
def debug_tags(fn):
MAX_RAW_LEN = 128
print("Tags:")
tf = music_tag.load_file(fn)
for t in tf.tag_map.keys():
s = "<invalid>"
try: s = str(tf[t])
except: pass # sometimes str throws an exception on tags (e.g. iTunes year format)
print(" [" + t + "] " + s)
print("Raw items:") # underlying mutagen items
for itm in tf.mfile.items(): print(" " + str(itm)[0:MAX_RAW_LEN])
# recursively dump MP4 box structure, returns False when end of file or error reached
def debug_mp4(mf,depth=0,boxsize=-1):
MP4_RECURSE = ["MP4 ","moov","trak","udta","mdia","edts","minf","dinf","stbl"] # MP4 box FCCs that can be recursed into
fpos = 0
while (boxsize < 0) or (fpos < boxsize):
mbpos = mf.tell()
headbytes = mf.read(8)
try:
if len(headbytes) == 0 and boxsize < 0: return False # clean end of file
if len(headbytes) < 8: raise Exception("EOF in MP4 box header")
mbsize = struct.unpack(">L",headbytes[0:4])[0]
if (mbsize < 8): raise Exception("Invalid MP4 box size at %08X" % (mbpos))
if (boxsize >= 0) and (fpos + mbsize) > boxsize: raise Exception("MP4 box at %08X too big for enclosing box" % (mbpos))
except Exception as ex:
print(" MP4 structure corrupt: "+str(ex))
return False # error
fcc = "".join([chr(c) if (c >= 0x20 and c < 0x7F) else '*' for c in headbytes[4:8]])
print(" %sMP4[%s] at %08X: %d bytes" % (". "*depth,fcc,mbpos,mbsize))
if fcc in MP4_RECURSE:
if not debug_mp4(mf,depth+1,mbsize-8):
return False
else:
mf.seek(mbsize-8,os.SEEK_CUR)
fpos += mbsize
return (fpos == boxsize) # clean end of box
# debug information found in M4A file
def debug_m4a(fn):
print("Debug: "+fn)
debug_tags(fn)
debug_mp4(open(fn,"rb"),0)
# strip the M4A, keep only the desired tags
def strip_m4a(fn_in,fn_out,fn_aac=TEMP_AAC,keep_tags=KEEP_TAGS,replace=REPLACE_ORIGINAL):
if DEBUG: debug_m4a(fn_in)
try:
HIDECONSOLE = subprocess.CREATE_NO_WINDOW # change to 0 if you want to see see the console commands
# use ffmpeg to make a lossless copy of the AAC data to a temporary file, then back to a clean M4A
command1 = FFMPEG + " -y -i \"" + fn_in + "\" -acodec copy \"" + TEMP_AAC + "\""
command2 = FFMPEG + " -y -i \"" + TEMP_AAC + "\" -acodec copy \"" + fn_out + "\""
if DRY_RUN:
print("Command: " + command1)
print("Command: " + command2)
print("Dry run: " + fn_in)
return
result = subprocess.run(shlex.split(command1),capture_output=True,text=True,creationflags=HIDECONSOLE)
if result.returncode != 0:
print(result.stderr)
raise Exception(command1)
result = subprocess.run(shlex.split(command2),capture_output=True,text=True,creationflags=HIDECONSOLE)
if result.returncode != 0:
print(result.stderr)
raise Exception(command2)
# copy the desired tags from the original file
if keep_tags and len(keep_tags) > 0:
ti = music_tag.load_file(fn_in)
to = music_tag.load_file(fn_out)
for tag in keep_tags:
if tag in ti.tag_map:
to.raw[tag] = ti.raw[tag]
to.save()
if DEBUG: debug_m4a(fn_out)
if replace:
if not filecmp.cmp(fn_in,fn_out,False):
# replace original with stripped version
os.remove(fn_in)
os.rename(fn_out,fn_in)
print("Stripped: %s" % (fn_in))
else:
# stripped version is identical, leave the original alone
os.remove(fn_out)
print("Already stripped: %s" % (fn_in))
else:
print("Stripped: %s" % (fn_out))
except Exception as ex:
print("Error stripping mp4: %s (%s)" % (fn_in,str(ex)))
#
# main program
#
fs = gather_files(DIRECTORY,[".m4a"])
fs = [fn for fn in fs if not fn.endswith(OUTPUT_EXT)] # skip stripped files
print("%d files found..." % (len(fs)))
for fn in fs:
fo = fn[0:fn.rfind('.')] + OUTPUT_EXT
strip_m4a(fn,fo,TEMP_AAC,KEEP_TAGS,REPLACE_ORIGINAL)
print("Done.")
@10kmotorola
Copy link

<3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment