Last active
January 10, 2018 02:49
-
-
Save nadavoosh/ead50d618baf879aeeee34bb036a5b7c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import sys | |
import webbrowser | |
from bs4 import BeautifulSoup | |
HAS_MOVED = 'This art has moved to a new URL. The art is now visible at:' | |
def get_link_to_next_art(soup): | |
return 'https://{}'.format(soup.findAll("a")[0].getText()) | |
def load_soup(url): | |
page = requests.get(url) | |
return BeautifulSoup(page.content, "lxml") | |
def find_art(seed_url, count): | |
if not count % 10: | |
print('Checking {}'.format(seed_url)) | |
print('Checked {} links so far'.format(count)) | |
soup = load_soup(seed_url) | |
if soup.body.findAll(text=HAS_MOVED): | |
count += 1 | |
find_art(get_link_to_next_art(soup), count) | |
else: | |
print('Found the art at {}!'.format(seed_url)) | |
# race condition here, this next call might not actually be the art :/ | |
webbrowser.open(get_link_to_next_art(load_soup(seed_url))) | |
if __name__ == '__main__': | |
if not len(sys.argv) > 1: | |
raise Exception('Please provide a URL where you have seen the art.') | |
try: | |
find_art(sys.argv[1], 1) | |
except RecursionError: | |
print('The world is changing too quickly. Try again with a more recent art location.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment