razasyedh/decode.py

## decode.py
#!/usr/bin/env python

"""decode.py

A python tool that decodes the obfuscated URL's on madokami pages and
outputs them to a file.

Dependencies: requests, beautifulsoup4

Usage: decode.py URL outputfile.txt

Note: This is no longer necessary/functional because madokami doesn't obfuscate their URL's anymore.
"""

import sys
import requests
from bs4 import BeautifulSoup
from urlparse import urlparse
import warnings

def validate_url(url):
    """Make sure the URL is from the right website and subdomain."""
    network_location = "manga.madokami.com"
    parsed_url = urlparse(url)

    if parsed_url.netloc != network_location:
        print "Error: Input is not a madokami link."
        sys.exit(1)

    return

def get_webpage(url):
    """Get the contents of the page and return a BeautifulSoup object of the
    page.
    """
    agent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"
    http_headers = {"user-agent": agent}

    print "Getting Webpage..."
    # We are making an unverified SSL connection because madokami's SSL
    #  certificate only seems to match madokami.com
    warnings.simplefilter("ignore") # Ignore urllib3's InsecureRequestWarning
    page = requests.get(url, verify=False, auth=(USERNAME, PASSWORD),
                        headers=http_headers)

    return_code = page.status_code
    if return_code == 401: # unauthorized
        print "Error: Please make sure your credentials are correct."
        sys.exit(1)
    if not page.ok:
        print "Error: The server returned a %d status code." % return_code
        sys.exit(1)

    # debugfile = open('/tmp/madokami.html', 'w')
    # debugfile.write(page.text)
    # debugfile.close()

    print "Parsing Webpage..."
    souped_page = BeautifulSoup(page.content)

    # Make sure madokami didn't just return the front page
    if souped_page.title.string == "/ - /a/ manga":
        print "Error: Please make sure your URL points to a proper series."
        sys.exit(1)

    warnings.resetwarnings()

    return souped_page

def string_to_list(string):
    """Takes a string of comma-delimited numbers and converts them to a list
    of ints.
    """
    num_list = string.split(',')
    num_list = [int(i) for i in num_list]
    return num_list

def get_table(webpage):
    """Find the table in the page source and return the table as a list."""
    # Expects: <div class="index-container" data-table="[int,int ... ,int]">
    table = webpage.find("div", {"class":"index-container"})["data-table"]
    table = table[1:-1] # remove brackets around data
    lookup_table = string_to_list(table)

    return lookup_table

def get_urls(webpage):
    """Find and return all encoded urls to download in the page source."""
    # Expects: <a href="" data-enc="{"url":[int,int, ... ,int]}"></a>
    encoded_links = webpage.find_all("a", {"href":""})

    link_list = []
    for link in encoded_links:
        link = link["data-enc"][8:-2] # Remove brackets + cruft
        link_list.append(link)

    return link_list

def decode_urls(encoded_urls, lookup_table):
    """Decode each url one by one and return a decoded list."""
    url_list = []
    url_base = "https://manga.madokami.com"

    for url in encoded_urls:
        single_url_list = string_to_list(url)
        single_url = url_base

        for character in single_url_list:
            # Here's where the magic happens. First, we XOR the character in
            # the URL to the decimal 51. Then we use the resulting number as
            # the index in the lookup table to get the actual ascii value of
            # the character. Finally we convert that value to ascii.
            ascii_num = character ^ 0x33
            ascii_code = lookup_table[ascii_num]
            ascii_letter = chr(ascii_code)
            single_url += ascii_letter

        url_list.append(single_url)

    print "Found %d URL's" % len(url_list)

    return url_list

def write_urls(url_list, output):
    """Write out the urls to a file."""
    out_file = open(output, 'w')
    for url in url_list:
        out_file.write("%s\n" % url)
    out_file.close()

    return

def main():
    """ Get the input url, validate it, and get the webpage it points to. Then
     parse that page to obtain the download urls.
     """
    if len(sys.argv) == 3:
        input_url = sys.argv[1]
        output_file = sys.argv[2]
    else:
        print "Usage: ./decode.py URL URL_FILE"
        sys.exit(1)

    validate_url(input_url)
    webpage = get_webpage(input_url)

    lookup_table = get_table(webpage)
    encoded_urls = get_urls(webpage)
    decoded_urls = decode_urls(encoded_urls, lookup_table)

    write_urls(decoded_urls, output_file)

    sys.exit(0)

if __name__ == "__main__":
    USERNAME = ""
    PASSWORD = ""

    try:
        main()
    except KeyboardInterrupt:
        print "User sent keyboard interrupt. Exiting..."
        exit(1)
	#!/usr/bin/env python

	"""decode.py

	A python tool that decodes the obfuscated URL's on madokami pages and
	outputs them to a file.

	Dependencies: requests, beautifulsoup4

	Usage: decode.py URL outputfile.txt

	Note: This is no longer necessary/functional because madokami doesn't obfuscate their URL's anymore.
	"""

	import sys
	import requests
	from bs4 import BeautifulSoup
	from urlparse import urlparse
	import warnings

	def validate_url(url):
	"""Make sure the URL is from the right website and subdomain."""
	network_location = "manga.madokami.com"
	parsed_url = urlparse(url)

	if parsed_url.netloc != network_location:
	print "Error: Input is not a madokami link."
	sys.exit(1)

	return

	def get_webpage(url):
	"""Get the contents of the page and return a BeautifulSoup object of the
	page.
	"""
	agent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"
	http_headers = {"user-agent": agent}

	print "Getting Webpage..."
	# We are making an unverified SSL connection because madokami's SSL
	# certificate only seems to match madokami.com
	warnings.simplefilter("ignore") # Ignore urllib3's InsecureRequestWarning
	page = requests.get(url, verify=False, auth=(USERNAME, PASSWORD),
	headers=http_headers)

	return_code = page.status_code
	if return_code == 401: # unauthorized
	print "Error: Please make sure your credentials are correct."
	sys.exit(1)
	if not page.ok:
	print "Error: The server returned a %d status code." % return_code
	sys.exit(1)

	# debugfile = open('/tmp/madokami.html', 'w')
	# debugfile.write(page.text)
	# debugfile.close()

	print "Parsing Webpage..."
	souped_page = BeautifulSoup(page.content)

	# Make sure madokami didn't just return the front page
	if souped_page.title.string == "/ - /a/ manga":
	print "Error: Please make sure your URL points to a proper series."
	sys.exit(1)

	warnings.resetwarnings()

	return souped_page

	def string_to_list(string):
	"""Takes a string of comma-delimited numbers and converts them to a list
	of ints.
	"""
	num_list = string.split(',')
	num_list = [int(i) for i in num_list]
	return num_list

	def get_table(webpage):
	"""Find the table in the page source and return the table as a list."""
	# Expects: <div class="index-container" data-table="[int,int ... ,int]">
	table = webpage.find("div", {"class":"index-container"})["data-table"]
	table = table[1:-1] # remove brackets around data
	lookup_table = string_to_list(table)

	return lookup_table

	def get_urls(webpage):
	"""Find and return all encoded urls to download in the page source."""
	# Expects: <a href="" data-enc="{"url":[int,int, ... ,int]}"></a>
	encoded_links = webpage.find_all("a", {"href":""})

	link_list = []
	for link in encoded_links:
	link = link["data-enc"][8:-2] # Remove brackets + cruft
	link_list.append(link)

	return link_list

	def decode_urls(encoded_urls, lookup_table):
	"""Decode each url one by one and return a decoded list."""
	url_list = []
	url_base = "https://manga.madokami.com"

	for url in encoded_urls:
	single_url_list = string_to_list(url)
	single_url = url_base

	for character in single_url_list:
	# Here's where the magic happens. First, we XOR the character in
	# the URL to the decimal 51. Then we use the resulting number as
	# the index in the lookup table to get the actual ascii value of
	# the character. Finally we convert that value to ascii.
	ascii_num = character ^ 0x33
	ascii_code = lookup_table[ascii_num]
	ascii_letter = chr(ascii_code)
	single_url += ascii_letter

	url_list.append(single_url)

	print "Found %d URL's" % len(url_list)

	return url_list

	def write_urls(url_list, output):
	"""Write out the urls to a file."""
	out_file = open(output, 'w')
	for url in url_list:
	out_file.write("%s\n" % url)
	out_file.close()

	return

	def main():
	""" Get the input url, validate it, and get the webpage it points to. Then
	parse that page to obtain the download urls.
	"""
	if len(sys.argv) == 3:
	input_url = sys.argv[1]
	output_file = sys.argv[2]
	else:
	print "Usage: ./decode.py URL URL_FILE"
	sys.exit(1)

	validate_url(input_url)
	webpage = get_webpage(input_url)

	lookup_table = get_table(webpage)
	encoded_urls = get_urls(webpage)
	decoded_urls = decode_urls(encoded_urls, lookup_table)

	write_urls(decoded_urls, output_file)

	sys.exit(0)

	if __name__ == "__main__":
	USERNAME = ""
	PASSWORD = ""

	try:
	main()
	except KeyboardInterrupt:
	print "User sent keyboard interrupt. Exiting..."
	exit(1)