Skip to content

Instantly share code, notes, and snippets.

@yutsuku
Created January 20, 2016 00:16
Show Gist options
  • Save yutsuku/8f8751e63746b891fd55 to your computer and use it in GitHub Desktop.
Save yutsuku/8f8751e63746b891fd55 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# This script will take your anime-planet.com username and scrape a list of your watched anime in
# XML utf-8 format to anime-planet.xml (and anime-planet.txt in case of problems)
#
# Original author: unknown
# Maintenance: moh'at'yutsuku.net
#
# Requirements: MAL account (for checking data)
# Additional info and packages:
# Python 3.* - http://python.org/download/
# BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
from bs4 import BeautifulSoup,NavigableString
import urllib.request, urllib.error, urllib.parse, sys, re, codecs, base64
MAL_username = "CHANGE_THIS"
MAL_password = "CHANGE_THIS"
queryURL = "http://myanimelist.net/api/anime/search.xml?q="
encodedstring = base64.encodestring("{}:{}".format(MAL_username, MAL_password).encode("ascii"))[:-1]
auth = "Basic {}".format(encodedstring.decode("utf-8"))
headers = {
"Authorization": auth,
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0"
}
def MAL_Verify():
verifyURL = "http://myanimelist.net/api/account/verify_credentials.xml"
req = urllib.request.Request(verifyURL, None, headers)
try:
handle = urllib.request.urlopen(req)
return 200
except urllib.request.HTTPError as e:
if e.code == 401:
return 401
else:
return e.code
def MAL_GetEntryID(title):
queryURL = "http://myanimelist.net/api/anime/search.xml?q="
req = urllib.request.Request(queryURL + urllib.parse.quote_plus(title), None, headers)
try:
handle = urllib.request.urlopen(req)
if handle.getcode() == 200:
xml = BeautifulSoup(handle.read())
#print(xml.anime.id.text)
#print(xml.anime.title.text)
#print(xml.anime.english.text)
#print(xml.anime.synonyms.text)
#print("[MAL_GetEntryID][ID:" + xml.anime.id.text + "] " + title)
return xml.anime.id.text
elif handle.getcode() == 204:
#print("[MAL_GetEntryID][ID:UNK] " + title)
return
else:
print("[MAL_GetEntryID] Unknown reponse from MAL API: " + handle.getcode())
return
except:
print("[Error] Something went HORRIBLY wrong.")
print("Unexpected error:", sys.exc_info()[0])
raise
MAL_Status = MAL_Verify()
if MAL_Status == 401:
print("[Error] Wrong username or password for MAL")
exit()
elif MAL_Status == 200:
print("[MAL] Account Verification Ok")
else:
print("[Error] MAL returned unknown reponse: " + MAL_Status)
exit()
print ('This script will export your anime-planet.com anime list and saves it to anime-planet.xml')
username = input("Enter your username: ")
baseURL = 'http://www.anime-planet.com/users/'+username+'/anime'
#html = urllib.request.urlopen(baseURL).read()
req = urllib.request.Request(url=baseURL,data=b'None',headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'})
html = urllib.request.urlopen(req).read()
html = BeautifulSoup(html)
pageNumber = int (html.find('li','next').findPrevious('li').next.contents[0])
delimiter = '\t'
f = codecs.open('anime-planet.xml', 'w', 'utf-8')
f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
f.write ('<myanimelist>\n')
fmanual = codecs.open('anime-planet.txt', 'w', 'utf-8')
fmanual.write('List of titles that could not be exported\n\n')
print ('Exporting rough variant of myanimelist format...')
for i in range(1,pageNumber+1):
baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
#html = urllib.request.urlopen(baseURL).read()
#req = urllib2.Request(baseURL, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0' })
#html = urllib2.urlopen(req).read()
req = urllib.request.Request(url=baseURL,data=b'None',headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'})
html = urllib.request.urlopen(req).read()
html = BeautifulSoup(html)
for animeItem in html.findAll('tr')[1:]:
animeItem = BeautifulSoup(animeItem.renderContents())
MAL_ID = MAL_GetEntryID(animeItem.a.text)
anime_status = animeItem.find('td','tableStatus').text.replace('status box','').strip(' \t\n\r').replace("Watched", "Completed").replace("Stalled", "On-Hold").replace("Want to Watch", "Plan to Watch")
anime_rating = str(int(float(animeItem.find('div', 'starrating').div.attrs['name'][0])*2))
if MAL_ID:
f.write ('\t<anime>\n');
f.write ('\t\t<series_animedb_id>' + MAL_ID + '</series_animedb_id>\n');
f.write ('\t\t<series_title><![CDATA['+ animeItem.a.text +']]></series_title>\n');
f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
f.write ('\t\t<series_episodes></series_episodes>\n');
f.write ('\t\t<my_id></my_id>\n');
f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1') +'</my_watched_episodes>\n');
f.write ('\t\t<my_start_date>0000-00-00</my_start_date>\n');
f.write ('\t\t<my_finish_date>0000-00-00</my_finish_date>\n');
f.write ('\t\t<my_rated></my_rated>\n');
f.write ('\t\t<my_score>' + anime_rating + '</my_score>\n');
#f.write ('\t\t<my_score>0</my_score>\n');
f.write ('\t\t<my_dvd></my_dvd>\n');
f.write ('\t\t<my_storage></my_storage>\n');
f.write ('\t\t<my_status>' + anime_status +'</my_status>\n');
f.write ('\t\t<my_comments><![CDATA[]]></my_comments>\n');
f.write ('\t\t<my_times_watched>0</my_times_watched>\n');
f.write ('\t\t<my_rewatch_value>0</my_rewatch_value>\n');
f.write ('\t\t<my_downloaded_eps>0</my_downloaded_eps>\n');
f.write ('\t\t<my_tags><![CDATA[]]></my_tags>\n');
f.write ('\t\t<my_rewatching></my_rewatching>\n');
f.write ('\t\t<my_rewatching_ep>0</my_rewatching_ep>\n');
f.write ('\t\t<update_on_import>1</update_on_import>\n');
f.write ('\t</anime>\n\n');
else:
if anime_rating == "0":
fmanual.write("[" + anime_status + "] " + animeItem.a.text + '\n')
else:
fmanual.write("[" + anime_status + "][" + anime_rating + "/10] " + animeItem.a.text + '\n')
f.write ('</myanimelist>\n')
print('Done, see anime-planet.xml and anime-planet.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment