Last active
January 10, 2018 16:20
-
-
Save mchlrhw/4864b437f79ed2d3d691be5b0e3a8cfb to your computer and use it in GitHub Desktop.
Find the Art - A solution to finding the art page for Permanent Redirect by Donald Hanson
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
A simple program to scrape the contents of permanent redirects to | |
"The Art" in order to track it down, before it can move on... | |
""" | |
import re | |
import requests | |
import sys | |
import time | |
from requests.exceptions import ConnectionError, SSLError | |
LINK_REGEX = re.compile(r">permanent-redirect.xyz/pages/([0-9]+)<") | |
def main(): | |
path_suffix = int(time.time()) | |
url_template = "https://permanent-redirect.xyz/pages/{}" | |
print("Attempting to get a foot on the ladder...") | |
while True: | |
url = url_template.format(path_suffix) | |
print("Trying {}".format(url)) | |
try: | |
res = requests.get(url) | |
except (ConnectionError, SSLError): | |
print("Error during request, just keep moving on") | |
continue | |
status_code = res.status_code | |
if status_code == 200: | |
break | |
path_suffix -= 1 | |
print("\nOk, we're on. Now climb it to the top...") | |
retries = 3 | |
while retries > 0: | |
url = url_template.format(path_suffix) | |
print("Trying {}".format(url)) | |
try: | |
res = requests.get(url) | |
except (ConnectionError, SSLError): | |
print("Error: SSLError during request") | |
retries -= 1 | |
continue | |
status_code = res.status_code | |
if status_code != 200: | |
print("Error: GET failed with {}".format(status_code)) | |
retries -= 1 | |
continue | |
body = res.text | |
if "301 Permanent Redirect" not in body: | |
print(body) | |
sys.exit(0) | |
match = LINK_REGEX.search(body) | |
if not match: | |
print("Error: Could not find next link in page") | |
print(body) | |
sys.exit(1) | |
path_suffix = match.group(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It aint pretty, but I don't want to spend too much time on it. Feel free to improve on my approach. The main insight is that the paths all end with Unix epoch timestamps of when they were created, so get a redirect page close to now seconds since 1970 and follow the trail from there. Not extensively tested, so YMMV.