Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Created February 28, 2019 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamletbatista/4520222c1b7fb43b99f655f5cefd4597 to your computer and use it in GitHub Desktop.
Save hamletbatista/4520222c1b7fb43b99f655f5cefd4597 to your computer and use it in GitHub Desktop.
crawl redirects
def get_redirects(url):
try:
# r = requests.get(url)
r = requests.head(url)
except:
return (url, None, "Error")
if r.status_code in [301, 302, 307]:
return (url, r.status_code, r.headers['Location'])
elif r.status_code == 404:
return (url, r.status_code, None)
else:
return (url, r.status_code, None)
results = []
def crawl_redirects(urls, sleep_time=.15):
global results
for i, url in enumerate(urls):
result = get_redirects(url)
results.append(result)
if i % 1000 == 0:
print(i,":", result)
time.sleep(sleep_time)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment