Created
January 2, 2021 19:02
-
-
Save Kenneth-T-Moore/02e1004cd9aaf0f6d5b05465c955fcc8 to your computer and use it in GitHub Desktop.
VGMDB check artist draft vs. legacy links
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Compares a draft to the original link and finds issues. | |
""" | |
from __future__ import print_function | |
from collections import OrderedDict | |
import re | |
import urllib | |
ALBUMID = 46185 | |
DRAFTID = 136 | |
role_pattern = re.compile("<span class=\"label\"><b><span title=\"(.*?)\" class=\"artistname\" lang=\"en\"") | |
subrole_pattern = re.compile("<span class=\"label\">\ \ \ \ <b><span title=\"(.*?)\" class=\"artistname\" lang=\"en\"") | |
artist_pattern = re.compile('<a href=\"/artist/(.*?)</a>') | |
url_new = 'https://vgmdb.net/album/{}?draft={}'.format(ALBUMID, DRAFTID) | |
url_old = 'https://vgmdb.net/album/{}'.format(ALBUMID) | |
def process_html(url): | |
sock = urllib.urlopen(url) | |
html = sock.read() | |
sock.close() | |
parts = html.split('tbody') | |
artist_table = parts[1].lstrip('>').rstrip('</') | |
artist_blocks = artist_table.split('maincred')[1:] | |
roles = OrderedDict() | |
names = {} | |
for block in artist_blocks: | |
role = role_pattern.findall(block) | |
artist_block = artist_pattern.findall(block) | |
if len(role) < 1: | |
role = subrole_pattern.findall(block) | |
for artist in artist_block: | |
artist_id = artist.split('"')[0] | |
if 'inline' in artist: | |
artist_name = artist.split('display:inline">')[1].split('</span')[0] | |
else: | |
artist_name = artist.split('>')[1] | |
if artist_id not in roles: | |
roles[artist_id] = [] | |
names[artist_id] = artist_name | |
roles[artist_id].extend(role) | |
return roles, names | |
old_role_dict, old_names = process_html(url_old) | |
new_role_dict, new_names = process_html(url_new) | |
missing = 0 | |
for artistid, old_roles in old_role_dict.iteritems(): | |
print(artistid, old_names[artistid]) | |
print(' old', old_roles) | |
if artistid in new_role_dict: | |
print(' new', new_role_dict[artistid]) | |
else: | |
print(' new: MISSING') | |
missing += 1 | |
print("") | |
print('Missing artists:', missing) | |
print('done') | |
""" | |
<tr class="maincred"> | |
<td nowrap="nowrap"> | |
<span class="label"><b> | |
<span title="Composed by" class="artistname" lang="en" style="display:inline">Composed by</span> | |
<span style="display:none"><em> / </em></span> | |
<span title="Composed by" class="artistname" lang="ja" style="display:none">Composed by</span> | |
</b></span> | |
</td> | |
<td width="100%"> | |
<a href="/artist/77"> | |
<span title="xxx" class="artistname" lang="en" style="display:inline">Nobuo Uematsu</span> | |
<span style="display:none"><em> / </em></span><span title="xxx" class="artistname" lang="ja" style="display:none">xxx</span> | |
</a> | |
</td> | |
</tr> | |
<tr class="maincred"> | |
<td nowrap="nowrap"> | |
<span class="label"><b> | |
<span title="Arranger" class="artistname" lang="en" style="display:inline">Arranger</span> | |
<span style="display:none"><em> / </em></span> | |
<span title="Arranger" class="artistname" lang="ja" style="display:none">Arranger</span> | |
</b></span> | |
</td> | |
<td width="100%"> | |
<a href="/artist/77"> | |
<span title="xxx" class="artistname" lang="en" style="display:inline">Nobuo Uematsu</span> | |
<span style="display:none"><em> / </em></span><span title="xxx" class="artistname" lang="ja" style="display:none">xxx</span> | |
</a>, | |
<a href="/artist/125"> | |
<span title="yyy" class="artistname" lang="en" style="display:inline">Shiro Hamaguchi</span> | |
<span style="display:none"><em> / </em></span><span title="yyy" class="artistname" lang="ja" style="display:none">yyy/span> | |
</a> | |
</td> | |
</tr> | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment