Created
January 20, 2016 00:16
-
-
Save yutsuku/8f8751e63746b891fd55 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# This script will take your anime-planet.com username and scrape a list of your watched anime in | |
# XML utf-8 format to anime-planet.xml (and anime-planet.txt in case of problems) | |
# | |
# Original author: unknown | |
# Maintenance: moh'at'yutsuku.net | |
# | |
# Requirements: MAL account (for checking data) | |
# Additional info and packages: | |
# Python 3.* - http://python.org/download/ | |
# BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download | |
from bs4 import BeautifulSoup,NavigableString | |
import urllib.request, urllib.error, urllib.parse, sys, re, codecs, base64 | |
MAL_username = "CHANGE_THIS" | |
MAL_password = "CHANGE_THIS" | |
queryURL = "http://myanimelist.net/api/anime/search.xml?q=" | |
encodedstring = base64.encodestring("{}:{}".format(MAL_username, MAL_password).encode("ascii"))[:-1] | |
auth = "Basic {}".format(encodedstring.decode("utf-8")) | |
headers = { | |
"Authorization": auth, | |
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0" | |
} | |
def MAL_Verify(): | |
verifyURL = "http://myanimelist.net/api/account/verify_credentials.xml" | |
req = urllib.request.Request(verifyURL, None, headers) | |
try: | |
handle = urllib.request.urlopen(req) | |
return 200 | |
except urllib.request.HTTPError as e: | |
if e.code == 401: | |
return 401 | |
else: | |
return e.code | |
def MAL_GetEntryID(title): | |
queryURL = "http://myanimelist.net/api/anime/search.xml?q=" | |
req = urllib.request.Request(queryURL + urllib.parse.quote_plus(title), None, headers) | |
try: | |
handle = urllib.request.urlopen(req) | |
if handle.getcode() == 200: | |
xml = BeautifulSoup(handle.read()) | |
#print(xml.anime.id.text) | |
#print(xml.anime.title.text) | |
#print(xml.anime.english.text) | |
#print(xml.anime.synonyms.text) | |
#print("[MAL_GetEntryID][ID:" + xml.anime.id.text + "] " + title) | |
return xml.anime.id.text | |
elif handle.getcode() == 204: | |
#print("[MAL_GetEntryID][ID:UNK] " + title) | |
return | |
else: | |
print("[MAL_GetEntryID] Unknown reponse from MAL API: " + handle.getcode()) | |
return | |
except: | |
print("[Error] Something went HORRIBLY wrong.") | |
print("Unexpected error:", sys.exc_info()[0]) | |
raise | |
MAL_Status = MAL_Verify() | |
if MAL_Status == 401: | |
print("[Error] Wrong username or password for MAL") | |
exit() | |
elif MAL_Status == 200: | |
print("[MAL] Account Verification Ok") | |
else: | |
print("[Error] MAL returned unknown reponse: " + MAL_Status) | |
exit() | |
print ('This script will export your anime-planet.com anime list and saves it to anime-planet.xml') | |
username = input("Enter your username: ") | |
baseURL = 'http://www.anime-planet.com/users/'+username+'/anime' | |
#html = urllib.request.urlopen(baseURL).read() | |
req = urllib.request.Request(url=baseURL,data=b'None',headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'}) | |
html = urllib.request.urlopen(req).read() | |
html = BeautifulSoup(html) | |
pageNumber = int (html.find('li','next').findPrevious('li').next.contents[0]) | |
delimiter = '\t' | |
f = codecs.open('anime-planet.xml', 'w', 'utf-8') | |
f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n') | |
f.write ('<myanimelist>\n') | |
fmanual = codecs.open('anime-planet.txt', 'w', 'utf-8') | |
fmanual.write('List of titles that could not be exported\n\n') | |
print ('Exporting rough variant of myanimelist format...') | |
for i in range(1,pageNumber+1): | |
baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i) | |
#html = urllib.request.urlopen(baseURL).read() | |
#req = urllib2.Request(baseURL, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0' }) | |
#html = urllib2.urlopen(req).read() | |
req = urllib.request.Request(url=baseURL,data=b'None',headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'}) | |
html = urllib.request.urlopen(req).read() | |
html = BeautifulSoup(html) | |
for animeItem in html.findAll('tr')[1:]: | |
animeItem = BeautifulSoup(animeItem.renderContents()) | |
MAL_ID = MAL_GetEntryID(animeItem.a.text) | |
anime_status = animeItem.find('td','tableStatus').text.replace('status box','').strip(' \t\n\r').replace("Watched", "Completed").replace("Stalled", "On-Hold").replace("Want to Watch", "Plan to Watch") | |
anime_rating = str(int(float(animeItem.find('div', 'starrating').div.attrs['name'][0])*2)) | |
if MAL_ID: | |
f.write ('\t<anime>\n'); | |
f.write ('\t\t<series_animedb_id>' + MAL_ID + '</series_animedb_id>\n'); | |
f.write ('\t\t<series_title><![CDATA['+ animeItem.a.text +']]></series_title>\n'); | |
f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n'); | |
f.write ('\t\t<series_episodes></series_episodes>\n'); | |
f.write ('\t\t<my_id></my_id>\n'); | |
f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace(' ','1') +'</my_watched_episodes>\n'); | |
f.write ('\t\t<my_start_date>0000-00-00</my_start_date>\n'); | |
f.write ('\t\t<my_finish_date>0000-00-00</my_finish_date>\n'); | |
f.write ('\t\t<my_rated></my_rated>\n'); | |
f.write ('\t\t<my_score>' + anime_rating + '</my_score>\n'); | |
#f.write ('\t\t<my_score>0</my_score>\n'); | |
f.write ('\t\t<my_dvd></my_dvd>\n'); | |
f.write ('\t\t<my_storage></my_storage>\n'); | |
f.write ('\t\t<my_status>' + anime_status +'</my_status>\n'); | |
f.write ('\t\t<my_comments><![CDATA[]]></my_comments>\n'); | |
f.write ('\t\t<my_times_watched>0</my_times_watched>\n'); | |
f.write ('\t\t<my_rewatch_value>0</my_rewatch_value>\n'); | |
f.write ('\t\t<my_downloaded_eps>0</my_downloaded_eps>\n'); | |
f.write ('\t\t<my_tags><![CDATA[]]></my_tags>\n'); | |
f.write ('\t\t<my_rewatching></my_rewatching>\n'); | |
f.write ('\t\t<my_rewatching_ep>0</my_rewatching_ep>\n'); | |
f.write ('\t\t<update_on_import>1</update_on_import>\n'); | |
f.write ('\t</anime>\n\n'); | |
else: | |
if anime_rating == "0": | |
fmanual.write("[" + anime_status + "] " + animeItem.a.text + '\n') | |
else: | |
fmanual.write("[" + anime_status + "][" + anime_rating + "/10] " + animeItem.a.text + '\n') | |
f.write ('</myanimelist>\n') | |
print('Done, see anime-planet.xml and anime-planet.txt') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment