MaxEtMoritz/Muzio2LB.md

## lb_import.py
#%%
import json
import pandas as pd
import pylistenbrainz as lb
import audio_metadata as meta
from os import path
# %%
# TODO: adapt to where the sound files present on your device are located
FILES_ROOT = r"D:\Documents\\"
# TODO: adapt file name if necessary
df = pd.read_csv(r"listenbrainz_history_to_import.tsv", dialect='excel-tab')
print(len(df))
df.drop_duplicates(ignore_index=True, inplace=True)
print(len(df))

# %%
def build_meta(row: pd.Series):
    """
    Builds the metadata for ListenBrainz submission by trying to read the audio file and parse the metadata.

    If no metadata found, prompt the user for title and artist.
    """
    data = {
        'listened_at': row['time_played'],
        'additional_info':{
            'media_player': 'Muzio',
            'submission_client': lb.__name__
        }
    }
    pth:str = row['path']
    if pd.isna(pth):
        print('no file path for song id', row['song_id'])
        return None

    if path.exists(FILES_ROOT + pth):
        # try to read file metadata
        try:
            file = meta.load(FILES_ROOT + pth)
        except (meta.FormatError, meta.UnsupportedFormat) as err:
            print('file load failed (most likely unsupported format): ', pth)
            a, t = guess_names(pth)
            data['artist_name'] = a
            data['track_name'] = t
            return data
        data['additional_info']['duration'] = file['streaminfo'].duration
        tags = file.tags
        if not tags or not tags.get('artist') or not tags.get('title'):
            print('no tags found for', pth)
            a, t = guess_names(pth)
            data['artist_name'] = a
            data['track_name'] = t
            return data
        #print(tags)
        data['artist_name'] = tags['artist'][0]
        data['track_name'] = tags['title'][0]
        if tags.get('usertext'):
            custom:list[meta.ID3v2UserText] = tags['usertext']
            additional = dict()
            for prop in custom:
                match prop.description:
                    case 'MusicBrainz Release Track Id':
                        print('rtrack id')
                        additional['track_mbid'] = prop.text[0]
                    case 'MusicBrainz Artist Id':
                        print('artist_mbid')
                        additional['artist_mbids'] = prop.text[0].split('; ')

            data['additional_info'].update(additional)

    else:
        # try to guess artist name / track title from filename
        print('file not found: ', pth)
        a, t = guess_names(pth)
        data['artist_name'] = a
        data['track_name'] = t


    return data

path2data = dict()

def guess_names(pth: str):
    """
    Prompts the user for the correct artist name and track title and tries to prefill using two formats:
    1. <artist> - <title>.mp3
    2. <tracknum>_<artist>_<title>.mp3
    """
    if path2data.get(pth):
        return path2data[pth]

    name = pth.split('/')[-1][:-4]
    # try to split
    arr = name.split(' - ',2)
    artsit = title = None
    if len(arr) > 1:
        artsit, title = arr
        result = input(f'enter artist name or #q to exit ({artsit}): ')
        if result:
            if result.lower() == '#q':
                raise KeyboardInterrupt()
            artsit = result

        result = input(f'enter title ({title}): ')
        if result:
            title = result
    if not artsit:
        arr = name.split('_',3)
        if len(arr) > 2:
            _, artsit, title = arr
            result = input(f'enter artist name or #q to exit ({artsit}): ')
            if result:
                if result.lower() == '#q':
                    raise KeyboardInterrupt()
                artsit = result

            result = input(f'enter title ({title}): ')
            if result:
                title = result
    if not artsit:
        while not artsit:
            artsit = input(f'enter artist name or #q to exit: ')
            if artsit.lower() == '#q':
                raise KeyboardInterrupt()
        while not title:
            title = input(f'enter title: ')

    path2data[pth] = (artsit, title)
    return artsit, title


# %%
data = list()
for i, row in df.iterrows():
    metadat = build_meta(row)
    if metadat:
        data.append(metadat)
# save metadata in case ListenBrainz complains to not have to repeat entering all the data.
with open('meta.json', 'w', encoding='utf8') as f:
    json.dump(data, f)
    f.flush()
    f.close()

# %%

# if using a saved file only run this:

# with open('meta.json', 'r', encoding='utf8') as f:
#     data:list = json.load(f)

auth_token = None
while auth_token is None or auth_token == '':
    auth_token = input('Enter your LB auth token (from https://listenbrainz.org/profile/): ')
client = lb.ListenBrainz()
client.set_auth_token(auth_token)
data = [lb.Listen(**item) for item in data]

client.submit_multiple_listens(data)

## Muzio2LB.md

      
    Raw
  

              Muzio2LB.md
            
          
    Muzio to ListenBrainz

Helper to import listens of a music player app to ListenBrainz.
Written for Muzio Player,
but can maybe be adapted for other players.
How did i get the music player's history?

Step 1: Backup app data

I backed up the player's data via adb (Android debug bridge).
adb backup com.shaiban.audioplayer.mplayer
and set a password for the backup and remebered it.
Step 2: Extract backup

i used the Android Backup Processor to transform the backup to a .tar file and extracted it.
Step 3: read data

the Backup for muzio player contained various SQLite database files, among them the file history.db.
Exploring the file via DB Browser for SQLite revealed a table mapping song IDs to timestamps in millisecond scale Unix Timestamps.
But where are the paths to the audio files???
I could only find a mapping between song IDs and paths inside another DB file: muzio.db.
There the table playlist_song mapping songs to their playlists contained the actual paths to the song files.
This means: to import all listens, you seem to need to create a playlist in Muzio and add all songs on your device to it before backing up.
Step 4: Combine and export the data

i used the following query after opening the main database (muzio.db) and attaching the history database (history.db):
select recent_history.song_id, substr(data, 27) as path, time_played from history.recent_history
left join playlist_song on recent_history.song_id = playlist_song.song_id
the substring is for removing the /storage/emulated/0/Music/ form the path, where all my music files are on my phone.
I simply exported the data from SQLite Studio by marking the complete result output, pressed 'copy with headers' and pasted to a text file.
You now have a tab-separated file of your listening history.
Step 5: Prepare for ListenBrainz and upload

This is where this python file comes into play.
It reads the saved data file and tries the following:

if the file is found, metadata is tried to be read: artist name, track title, artists MBIDs and track MBID if available
if file is not found, could not be read or no tags were found in the file, prompt the user and try to prefill the data from the file name.

To use, you need Python 3 with pandas, pylistenbrainz, audio-metadata pip packages installed.
Before usage, adapt at least the code lines marked with TODO: comments.
It may be necessary to adapt further, e.g. if you use it for another music player
or your file names have a different format to adapt guessing artist names and track titles from the filename.
	#%%
	import json
	import pandas as pd
	import pylistenbrainz as lb
	import audio_metadata as meta
	from os import path
	# %%
	# TODO: adapt to where the sound files present on your device are located
	FILES_ROOT = r"D:\Documents\\"
	# TODO: adapt file name if necessary
	df = pd.read_csv(r"listenbrainz_history_to_import.tsv", dialect='excel-tab')
	print(len(df))
	df.drop_duplicates(ignore_index=True, inplace=True)
	print(len(df))

	# %%
	def build_meta(row: pd.Series):
	"""
	Builds the metadata for ListenBrainz submission by trying to read the audio file and parse the metadata.

	If no metadata found, prompt the user for title and artist.
	"""
	data = {
	'listened_at': row['time_played'],
	'additional_info':{
	'media_player': 'Muzio',
	'submission_client': lb.__name__
	}
	}
	pth:str = row['path']
	if pd.isna(pth):
	print('no file path for song id', row['song_id'])
	return None

	if path.exists(FILES_ROOT + pth):
	# try to read file metadata
	try:
	file = meta.load(FILES_ROOT + pth)
	except (meta.FormatError, meta.UnsupportedFormat) as err:
	print('file load failed (most likely unsupported format): ', pth)
	a, t = guess_names(pth)
	data['artist_name'] = a
	data['track_name'] = t
	return data
	data['additional_info']['duration'] = file['streaminfo'].duration
	tags = file.tags
	if not tags or not tags.get('artist') or not tags.get('title'):
	print('no tags found for', pth)
	a, t = guess_names(pth)
	data['artist_name'] = a
	data['track_name'] = t
	return data
	#print(tags)
	data['artist_name'] = tags['artist'][0]
	data['track_name'] = tags['title'][0]
	if tags.get('usertext'):
	custom:list[meta.ID3v2UserText] = tags['usertext']
	additional = dict()
	for prop in custom:
	match prop.description:
	case 'MusicBrainz Release Track Id':
	print('rtrack id')
	additional['track_mbid'] = prop.text[0]
	case 'MusicBrainz Artist Id':
	print('artist_mbid')
	additional['artist_mbids'] = prop.text[0].split('; ')

	data['additional_info'].update(additional)

	else:
	# try to guess artist name / track title from filename
	print('file not found: ', pth)
	a, t = guess_names(pth)
	data['artist_name'] = a
	data['track_name'] = t


	return data

	path2data = dict()

	def guess_names(pth: str):
	"""
	Prompts the user for the correct artist name and track title and tries to prefill using two formats:
	1. <artist> - <title>.mp3
	2. <tracknum>_<artist>_<title>.mp3
	"""
	if path2data.get(pth):
	return path2data[pth]

	name = pth.split('/')[-1][:-4]
	# try to split
	arr = name.split(' - ',2)
	artsit = title = None
	if len(arr) > 1:
	artsit, title = arr
	result = input(f'enter artist name or #q to exit ({artsit}): ')
	if result:
	if result.lower() == '#q':
	raise KeyboardInterrupt()
	artsit = result

	result = input(f'enter title ({title}): ')
	if result:
	title = result
	if not artsit:
	arr = name.split('_',3)
	if len(arr) > 2:
	_, artsit, title = arr
	result = input(f'enter artist name or #q to exit ({artsit}): ')
	if result:
	if result.lower() == '#q':
	raise KeyboardInterrupt()
	artsit = result

	result = input(f'enter title ({title}): ')
	if result:
	title = result
	if not artsit:
	while not artsit:
	artsit = input(f'enter artist name or #q to exit: ')
	if artsit.lower() == '#q':
	raise KeyboardInterrupt()
	while not title:
	title = input(f'enter title: ')

	path2data[pth] = (artsit, title)
	return artsit, title


	# %%
	data = list()
	for i, row in df.iterrows():
	metadat = build_meta(row)
	if metadat:
	data.append(metadat)
	# save metadata in case ListenBrainz complains to not have to repeat entering all the data.
	with open('meta.json', 'w', encoding='utf8') as f:
	json.dump(data, f)
	f.flush()
	f.close()

	# %%

	# if using a saved file only run this:

	# with open('meta.json', 'r', encoding='utf8') as f:
	# data:list = json.load(f)

	auth_token = None
	while auth_token is None or auth_token == '':
	auth_token = input('Enter your LB auth token (from https://listenbrainz.org/profile/): ')
	client = lb.ListenBrainz()
	client.set_auth_token(auth_token)
	data = [lb.Listen(**item) for item in data]

	client.submit_multiple_listens(data)