Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
MediaWiki Image Export
#!/usr/bin/python
import urllib2
import json
import re
import os
import sys
from pprint import pprint
from urllib import quote_plus
DOWNLOAD_DIR = "images/"
BASE_URL = "http://tuxemon.referata.com"
URL = BASE_URL + "/w/api.php?action=query&list=allimages&ailimit=10000&format=json"
DEST_BASE_URL = "https://wiki.tuxemon.org"
EDIT_TOKEN = ""
UPLOAD_URL_PREFIX = DEST_BASE_URL + "/api.php?action=upload&token=" + EDIT_TOKEN
def wget(url, destination):
url = str(url)
destination = str(destination)
print "Saving '" + url + "' to '" + destination + "'"
response = urllib2.urlopen(url, timeout=5)
content = response.read()
f = open(destination, 'w')
f.write(content)
f.close()
if __name__ == "__main__":
if not os.path.exists(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
response = urllib2.urlopen(URL)
html = response.read()
"""Example data:
{
"query": {
"allimages": [
{
"name": "Abesnaki-front-back.png",
"title": "File:Abesnaki-front-back.png",
"url": "http://tuxemon.referata.com/w/images/Abesnaki-front-back.png",
"timestamp": "2016-02-04T16:31:58Z",
"descriptionurl": "http://tuxemon.referata.com/wiki/File:Abesnaki-front-back.png",
"ns": 6
},
{
"name": "Axylightl-back.png",
"title": "File:Axylightl-back.png",
"url": "http://tuxemon.referata.com/w/images/Axylightl-back.png",
"timestamp": "2016-02-04T16:08:56Z",
"descriptionurl": "http://tuxemon.referata.com/wiki/File:Axylightl-back.png",
"ns": 6
},
"""
data = json.loads(html)
images = []
for image in data["query"]["allimages"]:
img = {"history": []}
img["name"] = image["name"]
img["current"] = image["url"]
img["url"] = image["descriptionurl"]
html = urllib2.urlopen(img["url"]).read()
# Look for "/w/images/archive" to see if this image has a history.
pattern = r"/w/images/archive/\d+%21" + image["name"]
file_history = set(re.findall(pattern, html))
for item in file_history:
img["history"].append(BASE_URL + item)
img["history"].sort()
# Look up this image's comment
comment_lookup_url = BASE_URL + "/w/api.php?action=query&titles=" + quote_plus(image["title"]) + "&prop=imageinfo&iiprop=comment&format=json"
print "Looking up comment:", comment_lookup_url
html = urllib2.urlopen(comment_lookup_url).read()
"""Example image_comment data:
{
"query": {
"pages": {
"184": {
"imagerepository": "local",
"ns": 6,
"pageid": 184,
"imageinfo": [
{
"comment": "By mysterynoodler\n\n[[Category:Abesnaki]]\n[[Category:mysterynoodler]]\n[[Category:Nonstandard px]]\n[[Category:Alternative]]\n[[Category:Front Sprite]]\n[[Category:Back Sprite]]"
}
],
"title": "File:Abesnaki-front-back.png"
}
}
}
}
"""
print html
image_comment_data = json.loads(html)
for key, value in image_comment_data["query"]["pages"].items():
img["comment"] = value["imageinfo"][0]["comment"]
images.append(img)
pprint(img)
sys.exit()
"""Example img:
{'current': u'http://tuxemon.referata.com/w/images/Lambert_Evo_Take_II_first_attempt.png',
'history': ['http://tuxemon.referata.com/w/images/archive/20160215094756%21Lambert_Evo_Take_II_first_attempt.png',
'http://tuxemon.referata.com/w/images/archive/20170215094756%21Lambert_Evo_Take_II_first_attempt.png'],
'name': u'Lambert_Evo_Take_II_first_attempt.png',
'url': u'http://tuxemon.referata.com/wiki/File:Lambert_Evo_Take_II_first_attempt.png'}
"""
for img in images:
# Create a directory to store our image history
image_download_dir = DOWNLOAD_DIR + img["name"]
if not os.path.exists(image_download_dir):
os.makedirs(image_download_dir)
# Create a directory for our current image.
if not os.path.exists(image_download_dir + "/current"):
os.makedirs(image_download_dir + "/current")
# Download our current image.
#filename=Image.jpg&url=http://www.google.com/intl/en_ALL/images/logo.gif&token="
print UPLOAD_URL_PREFIX + "&filename=" + img["name"] + "&url=" + img["current"]
#wget(img["current"], image_download_dir + "/current/" + img["name"])
# Loop through our file history and create directories for each one.
for item in img["history"]:
timestamp = item.split(BASE_URL + "/w/images/archive/")[1].split("%21")[0]
history_dir = image_download_dir + "/" + timestamp
if not os.path.exists(history_dir):
os.makedirs(history_dir)
print UPLOAD_URL_PREFIX + "&filename=" + img["name"] + "&url=" + item
#wget(item, history_dir + "/" + img["name"])
# Get file history of each image and download that image.
# Example: http://tuxemon.referata.com/wiki/File:Lambert_Evo_Take_II_first_attempt.png
# Example archive: http://tuxemon.referata.com/w/images/archive/20160215094756%21Lambert_Evo_Take_II_first_attempt.png
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.