Skip to content

Instantly share code, notes, and snippets.

@jeamland
Last active September 14, 2021 21:29
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeamland/c856e9993008c9611a9910a3b22f9479 to your computer and use it in GitHub Desktop.
Save jeamland/c856e9993008c9611a9910a3b22f9479 to your computer and use it in GitHub Desktop.
iTunes Library (non-XML) playlist extractor
#!/usr/bin/env python
# Extract playlists from a non-XML iTunes Library file (.itl)
# Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.
# Important information on the encryption used in the .itl file found here:
# https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
# Highly useful information on the .itl format itself found here:
# https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java
import argparse
import collections
import csv
import enum
import io
import struct
import zlib
from Crypto.Cipher import AES
HEADER_LENGTH = 0x90
CRYPTO_KEY = b'BHUILuilfghuila3'
Hdfm = collections.namedtuple('Hdfm', field_names=[
'file_length',
'version',
])
Hdsm = collections.namedtuple('Hdsm', field_names=[
'block_type',
'block_length',
])
Hghm = collections.namedtuple('Hghm', field_names=[])
Hohm = collections.namedtuple('Hohm', field_names=[
'record_length',
'type',
'data',
])
Halm = collections.namedtuple('Hghm', field_names=[])
Haim = collections.namedtuple('Haim', field_names=[])
Hilm = collections.namedtuple('Hilm', field_names=[])
Hiim = collections.namedtuple('Hiim', field_names=[])
Htlm = collections.namedtuple('Htlm', field_names=[])
Htim = collections.namedtuple('Htim', field_names=[
'record_length',
'sub_blocks',
'song_id',
'block_type',
# 'file_type',
# 'playtime',
# 'track_number',
# 'track_total',
# 'year',
# 'bit_rate',
# 'sample_rate',
# 'volume_adjustment',
# 'start_time',
# 'end_time',
# 'play_count',
# 'compilation',
# 'last_played',
# 'disk_number',
# 'disk_total',
# 'rating',
# 'added',
])
Hqlm = collections.namedtuple('Hqlm', field_names=[])
Hqim = collections.namedtuple('Hqlm', field_names=[])
Hsts = collections.namedtuple('Hsts', field_names=[])
Hplm = collections.namedtuple('Hplm', field_names=[])
Hpim = collections.namedtuple('Hpim', field_names=[
'item_count',
])
Hptm = collections.namedtuple('Hptm', field_names=[
'key',
])
Hslm = collections.namedtuple('Hslm', field_names=[])
Hpsm = collections.namedtuple('Hpsm', field_names=[])
Hrlm = collections.namedtuple('Hrlm', field_names=[])
Hrpm = collections.namedtuple('Hrpm', field_names=[])
class HohmType(enum.IntEnum):
TITLE = 0x02
ALBUM_TITLE = 0x03
ARTIST = 0x04
PLAYLIST_TITLE = 0x64
HOHM_ODD_TYPES = (0x42, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x192, 0x1f7, 0x1f4, 0x202, 0x320)
class ItlIO(io.BytesIO):
def __init__(self, *args, **kwargs):
self.flipped = False
super().__init__(*args, **kwargs)
def skip(self, nbytes):
self.read(nbytes)
def read_ascii(self, nbytes):
return self.read(nbytes).decode('ascii')
def read_byte(self):
return self.read(1)[0]
def read_uint(self):
if self.flipped:
return struct.unpack('<I', self.read(4))[0]
else:
return struct.unpack('>I', self.read(4))[0]
class RecordParser:
def __init__(self, data):
self.data = ItlIO(data)
def parse(self):
while True:
record_type = self.data.read_ascii(4)
if not record_type:
return
if self.data.flipped:
record_type = record_type[-1::-1]
method = f'parse_{record_type}'
if not hasattr(self, method):
method = f'parse_{record_type[-1::-1]}'
if not hasattr(self, method):
print(self.data.getvalue()[self.data.tell():])
raise ValueError(f"unknown record type: {record_type}")
self.data.flipped = True
length = self.data.read_uint()
data = ItlIO(self.data.read(length - 8))
if self.data.flipped:
data.flipped = True
yield getattr(self, method)(data)
def parse_hdfm(self, data):
file_length = data.read_uint()
data.skip(4)
version_length = data.read_byte()
version = data.read_ascii(version_length)
return Hdfm(file_length=file_length,
version=version)
def parse_hdsm(self, data):
record_length = data.read_uint()
block_type = data.read_uint()
if block_type in (4, 22):
self.data.skip(record_length - len(data.getvalue()) - 8)
return Hdsm(block_type=block_type, block_length=record_length)
def parse_hghm(self, data):
return Hghm()
def parse_hohm(self, data):
record_length = data.read_uint()
hohm_type = data.read_uint()
hohm_data = self.data.read(record_length - len(data.getvalue()) - 8)
# print(hex(hohm_type), repr(hohm_data))
if hohm_type not in HOHM_ODD_TYPES:
hohm_data = hohm_data[16:]
# What even is character encoding?
# There might be something telling us what the encoding is but this
# is sufficient for current purposes.
if len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[0] == 0:
hohm_data = hohm_data.decode('utf-16be')
elif len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[-1] == 0:
hohm_data = hohm_data.decode('utf-16le')
else:
hohm_data = hohm_data.decode('iso-8859-1')
return Hohm(record_length=record_length, type=hohm_type, data=hohm_data)
def parse_halm(self, data):
return Halm()
def parse_haim(self, data):
return Haim()
def parse_hilm(self, data):
return Hilm()
def parse_hiim(self, data):
return Hiim()
def parse_htlm(self, data):
return Htlm()
def parse_htim(self, data):
record_length = data.read_uint()
sub_blocks = data.read_uint()
song_id = data.read_uint()
block_type = data.read_uint()
# data = self.data.read(record_length - len(data.getvalue()) - 8)
# print(repr(data))
return Htim(record_length, sub_blocks, song_id, block_type)
def parse_hqlm(self, data):
return Hqlm()
def parse_hqim(self, data):
return Hqim()
def parse_hsts(self, data):
return Hsts()
def parse_hplm(self, data):
return Hplm()
def parse_hpim(self, data):
data.skip(4 + 4)
item_count = data.read_uint()
return Hpim(item_count)
def parse_hptm(self, data):
data.skip(16)
key = data.read_uint()
return Hptm(key)
def parse_hslm(self, data):
return Hslm()
def parse_hpsm(self, data):
return Hpsm()
def parse_hrlm(self, data):
return Hrlm()
def parse_hrpm(self, data):
return Hrpm()
parser = argparse.ArgumentParser()
parser.add_argument('filename', nargs='?', default='iTunes Library.itl',
help='iTunes Library Filename')
args = parser.parse_args()
# So it appears that the .itl format, in modern versions of iTunes, has a header
# block containing some information, one part of which tells us how much of the
# following data is AES/ECB encrypted with a key that's made it around the
# Internet a bit. To get at the actual data you need to decrypt that bit in place
# then decompress (zlib) the bit after the initial header. After that it's a similar
# format to older iTunes library files.
itl = open(args.filename, 'rb').read()
header = itl[:HEADER_LENGTH]
crypt_length = (len(itl) - HEADER_LENGTH) & ~0xf
max_crypt_length = struct.unpack('>I', header[0x5C:0x60])[0]
crypt_length = min(crypt_length, max_crypt_length)
cipher = AES.new(CRYPTO_KEY, AES.MODE_ECB)
decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])
itl = decrypted + itl[max_crypt_length + HEADER_LENGTH:]
itl = header + zlib.decompress(itl)
track = {}
tracks = {}
playlist = {}
playlists = {}
for record in RecordParser(itl).parse():
if type(record) is Htim:
if track:
tracks[track['song_id']] = track
track = {'song_id': record.song_id}
elif type(record) is Hohm:
if record.type == HohmType.TITLE:
track['title'] = record.data
elif record.type == HohmType.ALBUM_TITLE:
track['album'] = record.data
elif record.type == HohmType.ARTIST:
track['artist'] = record.data
elif record.type == HohmType.PLAYLIST_TITLE:
playlist['title'] = record.data
elif type(record) is Hpim:
if playlist:
playlists[playlist['title']] = playlist
playlist = {'items': []}
elif type(record) is Hptm:
playlist['items'].append(record.key)
if track:
tracks[track['song_id']] = track
if playlist:
playlists[playlist['title']] = playlist
output = csv.writer(open('playlists.csv', 'w'))
for title, playlist in playlists.items():
# The playlists I was after had titles of the form 'YYYY-M' or 'YYYY-MM'...
if len(title) < 5 or title[0] != '2' or title[4] != '-':
continue
year, month = title.split('-')
# ... and I wanted to make them consistently 'YYYY-MM'.
title = f'{year}-{int(month):02d}'
for item in (tracks[x] for x in playlist['items']):
print(repr(item))
output.writerow([title, item['title'], item['artist'], item.get('album', '')])
@jacyap
Copy link

jacyap commented May 24, 2020

Hi, I'm trying to run this but it gives the errors -

Traceback (most recent call last):
File "playlist_extractor.py", line 294, in
for record in RecordParser(itl).parse():
File "playlist_extractor.py", line 153, in parse
raise ValueError(f"unknown record type: {record_type}")
ValueError: unknown record type: mx?<

How do I solve this?

@momotaro1966
Copy link

I'm finding an an invalid syntax error in line 147: method = f'parse_{record type}'

Any suggestions on how to resolve this?

@jeffreykog
Copy link

I'm finding an an invalid syntax error in line 147: method = f'parse_{record type}'

Any suggestions on how to resolve this?

@momotaro1966
The f string syntax is only supported starting from python3.7. So you need at least py3.7 or py3.8 to run it

@ebichu
Copy link

ebichu commented Sep 14, 2021

Doesn't work anymore, got this error. My understanding of Python is very limited but seems to need some padding? Tried some hacks but to no avail.
decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])
File "/usr/local/lib/python3.9/site-packages/Crypto/Cipher/blockalgo.py", line 295, in decrypt
return self._cipher.decrypt(ciphertext)
ValueError: Input strings must be a multiple of 16 in length

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment