Skip to content

Instantly share code, notes, and snippets.

@kartikeyaSh
Created April 4, 2018 19:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kartikeyaSh/485982932c8be14ef4e066a35d3d8b70 to your computer and use it in GitHub Desktop.
Save kartikeyaSh/485982932c8be14ef4e066a35d3d8b70 to your computer and use it in GitHub Desktop.
# Script to fetch artist MBIDs from MusicBrainz Database using
# the recording MBIDs in the MessyBrainz database.
import brainzutils.musicbrainz_db.recording as mb_recording
import json
import re
import default_config as config
try:
import custom_config as config
except ImportError:
pass
from brainzutils import musicbrainz_db
from brainzutils.musicbrainz_db.exceptions import NoDataFoundException
import db
from sqlalchemy import text
import brainzutils.cache
brainzutils.cache.init(host=config.REDIS_HOST, port=config.REDIS_PORT, namespace=config.REDIS_NAMESPACE)
def check_valid_uuid(s):
if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', s):
return True
else:
return False
def add_artist_mbids(recording_mbid, artist_mbids):
artist_mbids=json.dumps(artist_mbids)
with db.engine.begin() as connection:
query = text("""INSERT INTO recording_artist (recording_mbid, artist_mbids)
VALUES (:recording_mbid, :artist_mbids)
RETURNING recording_mbid""")
result = connection.execute(query, {
"recording_mbid": recording_mbid,
"artist_mbids": artist_mbids,
})
if result.rowcount:
return result.fetchone()["recording_mbid"]
else:
return None
def is_recording_mbid_present(recording_mbid):
# Check if recording MBID is already present in table.
with db.engine.begin() as connection:
query = text("""SELECT recording_mbid FROM recording_artist
WHERE recording_mbid = :recording_mbid""")
result = connection.execute(query, {
"recording_mbid": recording_mbid,
})
if result.rowcount:
return True
return False
def fetch_artist_mbids(recording_mbid):
if not is_recording_mbid_present(recording_mbid):
try :
recording = mb_recording.get_recording_by_mbid(recording_mbid, include=['artists'])
except NoDataFoundException:
return False
artist_mbids = []
for artist in recording['artists']:
artist_mbids.append(artist['id'])
result = add_artist_mbids(recording_mbid, artist_mbids)
if result:
return True
return False
else:
return True
def fetch_artist_mbids_for_all_recording_mbids():
db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
musicbrainz_db.init_db_engine(config.MB_DATABASE_URI)
# Get a list of all distinct recording MBIDs from recording_json table
with db.engine.begin() as connection:
query = text("""SELECT DISTINCT data ->> 'recording_mbid' AS recording_mbid
FROM recording_json
WHERE data ->> 'recording_mbid' IS NOT NULL""")
result = connection.execute(query)
for row in result:
print (row[0])
#import sys
#print('asdfdasfdasfdsfdas '+str(row[0]),file=sys.stderr)
if check_valid_uuid(row[0]):
fetch_artist_mbids(row[0])
fetch_artist_mbids_for_all_recording_mbids()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment