Skip to content

Instantly share code, notes, and snippets.

@nadavoosh
Last active January 10, 2018 02:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nadavoosh/ead50d618baf879aeeee34bb036a5b7c to your computer and use it in GitHub Desktop.
Save nadavoosh/ead50d618baf879aeeee34bb036a5b7c to your computer and use it in GitHub Desktop.
import requests
import sys
import webbrowser
from bs4 import BeautifulSoup
HAS_MOVED = 'This art has moved to a new URL. The art is now visible at:'
def get_link_to_next_art(soup):
return 'https://{}'.format(soup.findAll("a")[0].getText())
def load_soup(url):
page = requests.get(url)
return BeautifulSoup(page.content, "lxml")
def find_art(seed_url, count):
if not count % 10:
print('Checking {}'.format(seed_url))
print('Checked {} links so far'.format(count))
soup = load_soup(seed_url)
if soup.body.findAll(text=HAS_MOVED):
count += 1
find_art(get_link_to_next_art(soup), count)
else:
print('Found the art at {}!'.format(seed_url))
# race condition here, this next call might not actually be the art :/
webbrowser.open(get_link_to_next_art(load_soup(seed_url)))
if __name__ == '__main__':
if not len(sys.argv) > 1:
raise Exception('Please provide a URL where you have seen the art.')
try:
find_art(sys.argv[1], 1)
except RecursionError:
print('The world is changing too quickly. Try again with a more recent art location.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment