Skip to content

Instantly share code, notes, and snippets.

@fxthomas
Last active November 15, 2023 19:17
  • Star 10 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save fxthomas/fd85e906e41f4e6e06f38e92a497005b to your computer and use it in GitHub Desktop.
VGMDB to MusicBrainz import script (requires Python 3.6)
#!/usr/bin/python
# coding=utf-8
"""Python Script for bootstrapping a MusicBrainz release using a VGMDB album.
This script uses the unofficial VGMDB.info JSON API to prefill the MusicBrainz
"Add Release" form with data from VGMDB.
It is only meant as a first step to make adding a new MB release easier; please
check for missing/erroneous data and make sure the imported release follows the
MusicBrainz guidelines!
Because VGMDB has a lot of Japanese content, we import track/release titles from
this language before trying other languages, and try to guess values for some
fields (e.g. "Soundtrack" album types).
Documentation about the field format is found at:
https://musicbrainz.org/doc/Development/Release_Editor_Seeding
"""
import re
import sys
import json
import html
import argparse
import webbrowser
from urllib.request import urlopen
from tempfile import NamedTemporaryFile
from datetime import datetime
def strptimes(s, fmts):
for fmt in fmts:
try:
return datetime.strptime(s, fmt)
except ValueError:
continue
return None
def vgmdb_get_album_url(album_id, format_="json"):
"""Return the VGMDB API URL for the given album ID"""
return "https://vgmdb.info/album/%d?format=%s" % (album_id, format_)
def vgmdb_get_album_data(album_url):
"""Retrieve data for a VGMDB album"""
return json.load(urlopen(album_url))
def write_musicbrainz_html_form(fd, album_data):
"""Write a local MusicBrainz import form containing album data"""
fd.write("""<!doctype html>""")
fd.write("""<meta charset="UTF-8">""")
fd.write("""<title>Add VGMDB album As Release...</title>""")
fd.write("""<form action="https://musicbrainz.org/release/add" method="post">""")
album_title = album_data['names'].get('ja')
album_title = album_title or next(iter(album_data["names"].values()))
fd.write(f"""<input type="hidden" name="name" value="{html.escape(album_title)}">""")
fd.write(f"""<input type="hidden" name="status" value="official">""")
if "soundtrack" in album_data["classification"].lower():
fd.write(f"""<input type="hidden" name="type" value="album">""")
fd.write(f"""<input type="hidden" name="type" value="soundtrack">""")
if "ja" in album_data['names'].keys():
fd.write(f"""<input type="hidden" name="language" value="jpn">""")
fd.write(f"""<input type="hidden" name="script" value="Jpan">""")
all_artists = []
composers = []
arrangers = []
performers = []
lyricists = []
for composer_data in album_data["composers"]:
composer_name = composer_data["names"].get("ja")
composer_name = composer_name or next(iter(composer_data["names"].values()))
if composer_name not in all_artists:
all_artists.append(composer_name)
if composer_name not in composers:
composers.append(composer_name)
for arranger_data in album_data["arrangers"]:
arranger_name = arranger_data["names"].get("ja")
arranger_name = arranger_name or next(iter(arranger_data["names"].values()))
if arranger_name not in all_artists:
all_artists.append(arranger_name)
if arranger_name not in arrangers:
arrangers.append(arranger_name)
for performer_data in album_data["performers"]:
performer_name = performer_data["names"].get("ja")
performer_name = performer_name or next(iter(performer_data["names"].values()))
if performer_name not in all_artists:
all_artists.append(performer_name)
if performer_name not in performers:
performers.append(performer_name)
for lyricist_data in album_data["lyricists"]:
lyricist_name = lyricist_data["names"].get("ja")
lyricist_name = lyricist_name or next(iter(lyricist_data["names"].values()))
if lyricist_name not in all_artists:
all_artists.append(lyricist_name)
if lyricist_name not in lyricists:
lyricists.append(lyricist_name)
artists = ["Various Artists"] if len(performers) >= 3 else all_artists
track_artists = performers or arrangers or composers or lyricists or all_artists
join_phrase = ", "
for artist_ix, artist_name in enumerate(artists):
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""")
if artist_ix < len(artists)-1:
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.join_phrase" value="{html.escape(join_phrase)}">""")
release_date = strptimes(album_data['release_date'], ["%Y-%m-%d", "%Y"])
if release_date:
fd.write(f"""<input type="hidden" name="events.0.date.year" value="{release_date.year}">""")
fd.write(f"""<input type="hidden" name="events.0.date.month" value="{release_date.month}">""")
fd.write(f"""<input type="hidden" name="events.0.date.day" value="{release_date.day}">""")
if "ja" in album_data['names'].keys():
fd.write(f"""<input type="hidden" name="events.0.country" value="JP">""")
catalog_nr = album_data['catalog']
fd.write(f"""<input type="hidden" name="labels.0.catalog_number" value="{html.escape(catalog_nr)}">""")
vgmdb_link = album_data['vgmdb_link']
fd.write(f"""<input type="hidden" name="urls.0.url" value="{html.escape(vgmdb_link)}">""")
fd.write(f"""<input type="hidden" name="urls.0.link_type" value="86">""") # VGMDB
fd.write(f"""<input type="hidden" name="edit_note" value="Imported from {html.escape(vgmdb_link)}">""")
for disc_ix, disc_data in enumerate(album_data["discs"]):
if album_data["media_format"] == "CD":
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.format" value="CD">""")
for track_ix, track_data in enumerate(disc_data["tracks"]):
track_title = track_data["names"].get("Japanese")
track_title = track_title or next(iter(track_data["names"].values()))
if track_data["track_length"] and track_data["track_length"].lower() != "unknown":
track_length = datetime.strptime(track_data["track_length"], "%M:%S")
track_length = 1000 * (track_length.minute*60 + track_length.second)
else:
track_length = 0
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.name" value="{html.escape(track_title)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.length" value="{track_length}">""")
for artist_ix, artist_name in enumerate(track_artists):
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.mbid" value="">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.name" value="{html.escape(artist_name)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.join_phrase" value=", ">""")
fd.write("""<input type="submit" value="Add Cluster As Release...">""")
fd.write("""</form>""")
fd.write("""<script>document.forms[0].submit()</script>""")
# Parse arguments
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("album_id_url", help="VGMDB album id or URL", type=str)
parser.add_argument("--show-api-page", "-s", help="Show API page instead of the MB form", action="store_true")
args = parser.parse_args()
# Parse album ID
album_id = None
m = re.match(r"(https?://)?vgmdb.net/album/(?P<album_id>\d+).*", args.album_id_url)
if m:
album_id = int(m.group("album_id"))
elif args.album_id_url.isdigit():
album_id = int(args.album_id_url)
else:
print("Invalid album ID or URL")
sys.exit(1)
# Retrieve album URL
if args.show_api_page:
album_url = vgmdb_get_album_url(album_id, format_="html")
print("Opening %s" % album_url)
webbrowser.open(album_url)
sys.exit(0)
# Retrieve album data, write and open MusicBrainz form
album_url = vgmdb_get_album_url(album_id)
album_data = vgmdb_get_album_data(album_url)
with NamedTemporaryFile(suffix=".html", encoding="utf-8", mode="w+", delete=False) as fd:
write_musicbrainz_html_form(fd, album_data)
print("Opening %s" % fd.name)
webbrowser.open(fd.name)
Copy link

ghost commented Jul 10, 2021

I'm new to python and I need help on entering the arguments needed for this to work
image

@fxthomas
Copy link
Author

fxthomas commented Jul 10, 2021

@mitsufune You need to open a system command-line window, not a Python shell. I believe on Windows 10 you can use something called "Powershell", and the command-line should look like this by default?

PS> python vgmdb2mb.py https://vgmdb.net/album/... <you can press ENTER after this>

Never tested it on Windows though, let me know if something's not compatible!

@Tenome
Copy link

Tenome commented Oct 6, 2021

The script breaks if the VGMDB page only has the year, just FYI, since it expects the full date format.
https://vgmdb.net/album/20652
Thanks for the script though, it's been very useful. The other MB VGMDB script doesn't work half the time.

@fxthomas
Copy link
Author

fxthomas commented Nov 6, 2021

@Tenome Thanks, never got this kind of album before. Updated so it works with year-only dates!

(That's a totally obscure release by the way, I was curious but did not find it anywhere online!)

@Tenome
Copy link

Tenome commented Apr 3, 2022

@fxthomas Might need to be updated again? I tried this URL and it gave me an internal server error, but that might just be a problem on VGMDB's end. The other VGMDB userscript also doesn't seem to work anymore, so it could be that VGMDB updated (again). Here's the album I tried: https://vgmdb.net/album/105445

Traceback (most recent call last):
\Scripts\vgmdb2mb.py", line 183, in
album_data = vgmdb_get_album_data(album_url)
\Scripts\vgmdb2mb.py", line 48, in vgmdb_get_album_data
return json.load(urlopen(album_url))
\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
\lib\urllib\request.py", line 531, in open
response = meth(req, response)
\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Internal Server Error

@fxthomas
Copy link
Author

The API endpoint this script uses is hosted at http://vgmdb.info which is separate from the VGMDB website. It's sometimes offline, but usually gets back up after a while.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment