mchlrhw/find_the_art.py

## find_the_art.py
#!/usr/bin/env python3


"""
A simple program to scrape the contents of permanent redirects to
"The Art" in order to track it down, before it can move on...
"""


import re
import requests
import sys
import time

from requests.exceptions import ConnectionError, SSLError


LINK_REGEX = re.compile(r">permanent-redirect.xyz/pages/([0-9]+)<")


def main():
    path_suffix = int(time.time())

    url_template = "https://permanent-redirect.xyz/pages/{}"

    print("Attempting to get a foot on the ladder...")
    while True:
        url = url_template.format(path_suffix)
        print("Trying {}".format(url))
        try:
            res = requests.get(url)
        except (ConnectionError, SSLError):
            print("Error during request, just keep moving on")
            continue

        status_code = res.status_code
        if status_code == 200:
            break

        path_suffix -= 1

    print("\nOk, we're on. Now climb it to the top...")
    retries = 3
    while retries > 0:
        url = url_template.format(path_suffix)
        print("Trying {}".format(url))
        try:
            res = requests.get(url)
        except (ConnectionError, SSLError):
            print("Error: SSLError during request")
            retries -= 1
            continue

        status_code = res.status_code
        if status_code != 200:
            print("Error: GET failed with {}".format(status_code))
            retries -= 1
            continue

        body = res.text
        if "301 Permanent Redirect" not in body:
            print(body)
            sys.exit(0)

        match = LINK_REGEX.search(body)
        if not match:
            print("Error: Could not find next link in page")
            print(body)
            sys.exit(1)

        path_suffix = match.group(1)


if __name__ == "__main__":
    main()
	#!/usr/bin/env python3


	"""
	A simple program to scrape the contents of permanent redirects to
	"The Art" in order to track it down, before it can move on...
	"""


	import re
	import requests
	import sys
	import time

	from requests.exceptions import ConnectionError, SSLError


	LINK_REGEX = re.compile(r">permanent-redirect.xyz/pages/([0-9]+)<")


	def main():
	path_suffix = int(time.time())

	url_template = "https://permanent-redirect.xyz/pages/{}"

	print("Attempting to get a foot on the ladder...")
	while True:
	url = url_template.format(path_suffix)
	print("Trying {}".format(url))
	try:
	res = requests.get(url)
	except (ConnectionError, SSLError):
	print("Error during request, just keep moving on")
	continue

	status_code = res.status_code
	if status_code == 200:
	break

	path_suffix -= 1

	print("\nOk, we're on. Now climb it to the top...")
	retries = 3
	while retries > 0:
	url = url_template.format(path_suffix)
	print("Trying {}".format(url))
	try:
	res = requests.get(url)
	except (ConnectionError, SSLError):
	print("Error: SSLError during request")
	retries -= 1
	continue

	status_code = res.status_code
	if status_code != 200:
	print("Error: GET failed with {}".format(status_code))
	retries -= 1
	continue

	body = res.text
	if "301 Permanent Redirect" not in body:
	print(body)
	sys.exit(0)

	match = LINK_REGEX.search(body)
	if not match:
	print("Error: Could not find next link in page")
	print(body)
	sys.exit(1)

	path_suffix = match.group(1)


	if __name__ == "__main__":
	main()