Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Kenneth-T-Moore/02e1004cd9aaf0f6d5b05465c955fcc8 to your computer and use it in GitHub Desktop.
Save Kenneth-T-Moore/02e1004cd9aaf0f6d5b05465c955fcc8 to your computer and use it in GitHub Desktop.
VGMDB check artist draft vs. legacy links
"""
Compares a draft to the original link and finds issues.
"""
from __future__ import print_function
from collections import OrderedDict
import re
import urllib
ALBUMID = 46185
DRAFTID = 136
role_pattern = re.compile("<span class=\"label\"><b><span title=\"(.*?)\" class=\"artistname\" lang=\"en\"")
subrole_pattern = re.compile("<span class=\"label\">\&nbsp;\&nbsp;\&nbsp;\&nbsp;<b><span title=\"(.*?)\" class=\"artistname\" lang=\"en\"")
artist_pattern = re.compile('<a href=\"/artist/(.*?)</a>')
url_new = 'https://vgmdb.net/album/{}?draft={}'.format(ALBUMID, DRAFTID)
url_old = 'https://vgmdb.net/album/{}'.format(ALBUMID)
def process_html(url):
sock = urllib.urlopen(url)
html = sock.read()
sock.close()
parts = html.split('tbody')
artist_table = parts[1].lstrip('>').rstrip('</')
artist_blocks = artist_table.split('maincred')[1:]
roles = OrderedDict()
names = {}
for block in artist_blocks:
role = role_pattern.findall(block)
artist_block = artist_pattern.findall(block)
if len(role) < 1:
role = subrole_pattern.findall(block)
for artist in artist_block:
artist_id = artist.split('"')[0]
if 'inline' in artist:
artist_name = artist.split('display:inline">')[1].split('</span')[0]
else:
artist_name = artist.split('>')[1]
if artist_id not in roles:
roles[artist_id] = []
names[artist_id] = artist_name
roles[artist_id].extend(role)
return roles, names
old_role_dict, old_names = process_html(url_old)
new_role_dict, new_names = process_html(url_new)
missing = 0
for artistid, old_roles in old_role_dict.iteritems():
print(artistid, old_names[artistid])
print(' old', old_roles)
if artistid in new_role_dict:
print(' new', new_role_dict[artistid])
else:
print(' new: MISSING')
missing += 1
print("")
print('Missing artists:', missing)
print('done')
"""
<tr class="maincred">
<td nowrap="nowrap">
<span class="label"><b>
<span title="Composed by" class="artistname" lang="en" style="display:inline">Composed by</span>
<span style="display:none"><em> / </em></span>
<span title="Composed by" class="artistname" lang="ja" style="display:none">Composed by</span>
</b></span>
</td>
<td width="100%">
<a href="/artist/77">
<span title="xxx" class="artistname" lang="en" style="display:inline">Nobuo Uematsu</span>
<span style="display:none"><em> / </em></span><span title="xxx" class="artistname" lang="ja" style="display:none">xxx</span>
</a>
</td>
</tr>
<tr class="maincred">
<td nowrap="nowrap">
<span class="label"><b>
<span title="Arranger" class="artistname" lang="en" style="display:inline">Arranger</span>
<span style="display:none"><em> / </em></span>
<span title="Arranger" class="artistname" lang="ja" style="display:none">Arranger</span>
</b></span>
</td>
<td width="100%">
<a href="/artist/77">
<span title="xxx" class="artistname" lang="en" style="display:inline">Nobuo Uematsu</span>
<span style="display:none"><em> / </em></span><span title="xxx" class="artistname" lang="ja" style="display:none">xxx</span>
</a>,
<a href="/artist/125">
<span title="yyy" class="artistname" lang="en" style="display:inline">Shiro Hamaguchi</span>
<span style="display:none"><em> / </em></span><span title="yyy" class="artistname" lang="ja" style="display:none">yyy/span>
</a>
</td>
</tr>
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment