Skip to content

Instantly share code, notes, and snippets.

@dnsev

dnsev/dlc.py Secret

Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dnsev/809874d41414c883a091 to your computer and use it in GitHub Desktop.
Save dnsev/809874d41414c883a091 to your computer and use it in GitHub Desktop.
Mangatrader Downloader (Centralized)
#! /usr/bin/env python
# Usage: python dlc.py
# V3.0
# Requests a series to download and then downloads
# /mt/Status on the server
import os, re, sys, json, time, random, base64, unicodedata;
try:
# 2.x
py_v = 2;
import urllib2 as urllib;
urllib_Request = urllib.Request;
urllib_urlopen = urllib.urlopen;
except:
# 3.x
py_v = 3;
import urllib, urllib.request;
urllib_Request = urllib.request.Request;
urllib_urlopen = urllib.request.urlopen;
# Info
sys.stdout.write("Mangatrader Downloader (Centralized)\n");
sys.stdout.write(" CTRL+C to end at any time\n");
# Download
def download(id):
# File list
url = "http://www.mangatraders.com/download/file/{0:d}".format(id);
# Download
req = urllib_Request(url, headers={
"User-Agent": "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11",
"Cookie": 'PHPSESSID=3a606935c6761a1bbbf3a9e32b96df20; SMFCookie232=a%3A4%3A%7Bi%3A0%3Bs%3A6%3A%22916195%22%3Bi%3A1%3Bs%3A40%3A%220889c24c35d6a3028118aadc9cb3017c7360499b%22%3Bi%3A2%3Bi%3A1401923171%3Bi%3A3%3Bi%3A0%3B%7D; mt-2008-01-13=20705602fffd51732b598730c969f3c1; popunder=yes; popundr=yes; setover18=1'
# In Firefox: SHIFT+F4, "prompt(1, document.cookie);", CTRL+R, copy text here
});
link = urllib_urlopen(req);
result = link.read();
link.close();
# Return downloaded file
return result;
# Parse XML data
def parse_xml(xml_str, full_file):
targets = [];
for m1 in re_file.finditer(xml_str):
# Parse info
id = int(m1.group(1), 10);
content = m1.group(2);
m2 = re_field_name.search(content);
if (m2 is not None):
name = re_normalize_field.sub("\\g<1>", m2.group(1));
if (len(name) == 0):
name = m2.group(1);
else:
name = None;
m2 = re_field_cat.search(content);
if (m2 is not None):
series = re_normalize_field.sub("\\g<1>", m2.group(1));
if (len(series) == 0):
series = m2.group(1);
else:
series = None;
m2 = re_field_server.search(content);
if (m2 is not None):
server = re_normalize_field.sub("\\g<1>", m2.group(1));
if (len(server) == 0):
server = m2.group(1);
else:
server = None;
m2 = re_field_size.search(content);
if (m2 is not None):
size = label_to_bytes(m2.group(1));
else:
size = 0;
# Append
targets.append({
"id": id,
"name": name,
"series": series,
"source": full_file,
"server": server,
"size": size,
});
# Done
return targets;
# Convert a byte label string into bytes
def label_to_bytes(text):
labels = ['b', 'kb', 'mb', 'gb'];
re_label = re.compile(r'([0-9\.]+)(' + r'|'.join(labels) + ')', re.I);
match = re_label.search(text);
if (match is None):
raise Exception("Bad format");
return 0;
value = float(match.group(1));
i = labels.index(match.group(2).lower());
return int(round(value * (1024 ** i)));
def bytes_to_label(size):
labels = ['b', 'kb', 'mb', 'gb'];
label_id = len(labels) - 1;
for i in range(len(labels)):
if (size < (1024 ** (i + 1))):
label_id = i;
break;
return str(round(size / (1024 ** label_id) * 100.0) / 100.0) + labels[label_id];
def normalize_path(text):
re_punct = re.compile(r"[^a-zA-Z0-9\!\@\#\$\%\^\&\(\)\_\+\-\=\`\~\[\]\{\}\;\'\,\. ]");
return re_punct.sub("", text);
# Directories
script_dir = os.path.dirname(os.path.realpath(__file__));
# Regex
re_file = re.compile(r'<file id="([0-9]+)">(.*?)</file>', re.DOTALL);
re_normalize_field = re.compile(r'\<\!\[CDATA\[(.*?)\]\]\>', re.DOTALL);
re_field_name = re.compile(r'\<file_disp\>(.*?)\<\/file_disp\>', re.DOTALL);
re_field_cat = re.compile(r'\<cat_disp\>(.*?)\<\/cat_disp\>', re.DOTALL);
re_field_size = re.compile(r'\<file_size\>(.*?)\<\/file_size\>', re.DOTALL);
re_field_server = re.compile(r'\<file_server\>(.*?)\<\/file_server\>', re.DOTALL);
re_cd_name = re.compile(r'filename="(.*)"', re.DOTALL);
re_xml_id = re.compile(r'([0-9]+)\.xml');
# Server request
url_request = (base64.b64decode("aHR0cDovLzE0Ni4xODUuMTM3LjE5NS9tdC9Eb3dubG9hZFJhbmRvbVNlcmllcw==")).decode("utf-8");
url_success = (base64.b64decode("aHR0cDovLzE0Ni4xODUuMTM3LjE5NS9tdC9SZXBvcnRTdWNjZXNzLw==")).decode("utf-8");
s_id = 1;
debug = False;
while (True):
# Request
if (debug):
url = os.path.join(script_dir, "0.xml");
actual_id = 0;
f = open(url, "rb");
xml = str(f.read());
f.close();
else:
sys.stdout.write("Requesting series...\n");
url = url_request;
req = urllib_Request(url, headers={
"User-Agent": "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11",
});
link = urllib_urlopen(req);
xml = str(link.read());
link.close();
url_final = link.geturl();
filename = os.path.split(url_final)[1];
if ("Content-Disposition" in link.info()):
filename = str(link.info()["Content-Disposition"]);
match = re_cd_name.search(filename);
if (match is not None):
filename = match.group(1);
match = re_xml_id.search(filename);
if (match is not None):
actual_id = int(match.group(1), 10);
else:
actual_id = 0;
# Get data
series = parse_xml(xml, url);
new_downloads = 0;
if (len(series) > 0):
# Targets are found now
series_size = 0;
for target in series:
series_size += target["size"];
if (len(series) > 0):
series_name = series[0]["series"];
else:
series_name = "Unknown";
sys.stdout.write("Downloading series {0:d}({1:d}) ({2:s}): {3:s}...\n".format(actual_id, s_id, bytes_to_label(series_size), series_name));
# Start
t_id = 1;
for target in series:
# Make dirs
dir = os.path.join(os.path.join(script_dir, "dls"), normalize_path(target["series"]));
try:
os.makedirs(dir);
except:
pass;
# Rar or zip
zip = ".zip";
rar = ".rar";
headers = {};
headers[zip] = "\x50\x4b\x03\x04";
headers[rar] = "\x52\x61\x72\x21\x1A\x07\x00";
# Filename
filename_base = os.path.join(dir, normalize_path(target["name"]));
already_downloaded = (os.path.exists(filename_base + ".zip") or os.path.exists(filename_base + ".rar"));
status = "Downloading";
if (already_downloaded):
status = "Skipping";
# Status
sys.stdout.write(" {0:s} {1:d} of {2:d} ({3:s}): {4:s}...\n".format(status, t_id, len(series), bytes_to_label(target["size"]), target["name"]));
# Skip
if (not already_downloaded):
filename_base = os.path.join(dir, normalize_path(target["name"]));
# Download file
dl = download(target["id"]);
# Type
if (dl[0:len(headers[rar])] == headers[rar]):
type = rar;
elif (dl[0:len(headers[zip])] == headers[zip]):
type = zip;
else:
type = ".archive";
# Write file
filename = filename_base + type;
f = open(filename, "wb");
f.write(dl);
f.close();
# Downloaded
new_downloads += 1;
# Next
t_id += 1;
# Next
sys.stdout.write("Done downloading series\n\n");
s_id += 1;
# Okay
if (new_downloads > 0 and not debug):
url = url_success + str(actual_id);
req = urllib_Request(url, headers={
"User-Agent": "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11",
});
link = urllib_urlopen(req);
xml = str(link.read());
link.close();
# Done
sys.stdout.write("Done!\n");
sys.exit(0);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment