Skip to content

Instantly share code, notes, and snippets.

@xymerone
Created June 2, 2020 07:40
Show Gist options
  • Save xymerone/1918e3ee07b69aeadbadba200be4ff86 to your computer and use it in GitHub Desktop.
Save xymerone/1918e3ee07b69aeadbadba200be4ff86 to your computer and use it in GitHub Desktop.
from requests import get
from bs4 import BeautifulSoup
def get_urls_sitemap(sitemap_url = ''):
print("Scan sitemap "+sitemap_url)
res = get(sitemap_url).text
soup = BeautifulSoup(res, 'xml')
urls = soup.select('url')
urls_test = []
if len(urls) > 0:
for url in urls:
loc = url.select('loc')[0].getText()
# print(loc)
urls_test.append(loc)
return urls_test
else:
return None
URL = input("URL sitemap.xml: ")
RES_XML = get(URL).text
soup = BeautifulSoup(RES_XML, 'xml')
urls_res = []
sitemaps = soup.select('sitemap')
if len(sitemaps) > 0:
print("Parsing sitenaos ")
url_sitemaps = []
for sitemap in sitemaps:
url = sitemap.select('loc')[0].getText()
urls_res += get_urls_sitemap(url)
else:
urls_res += get_urls_sitemap(URL)
print("Search urls: " + str(len(urls_res)))
print("Start test links...")
for u in urls_res:
res = get(u);
if (res.status_code != 200):
print("Problem in: " + u + " code: "+ str(res.status_code))
print("Testing finish")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment