Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
crawl redirects
def get_redirects(url):
try:
# r = requests.get(url)
r = requests.head(url)
except:
return (url, None, "Error")
if r.status_code in [301, 302, 307]:
return (url, r.status_code, r.headers['Location'])
elif r.status_code == 404:
return (url, r.status_code, None)
else:
return (url, r.status_code, None)
results = []
def crawl_redirects(urls, sleep_time=.15):
global results
for i, url in enumerate(urls):
result = get_redirects(url)
results.append(result)
if i % 1000 == 0:
print(i,":", result)
time.sleep(sleep_time)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.