Skip to content

Instantly share code, notes, and snippets.

@diije
Created December 1, 2022 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save diije/0d4d45d8c17727f33e89152b70622d47 to your computer and use it in GitHub Desktop.
Save diije/0d4d45d8c17727f33e89152b70622d47 to your computer and use it in GitHub Desktop.
Python script to generate a XML sitemap from a list of URLs
import argparse
from validator_collection import validators, errors # pip install validator-collection
import requests # pip install requests
import time
def main(args):
with open(args.input) as f:
urls = f.readlines()
sitemap_urls = []
for url in urls:
try:
# Check if URL is valid
url = validators.url(url)
# Check if URL response code is 200
r = requests.get(url)
if r.status_code == 200:
xml = f"""<url>\n <loc>{url.strip()}</loc> \n</url>\n"""
sitemap_urls.append(xml)
else:
print(f"URL {url} returned {r.status_code} status code")
except errors.InvalidURLError:
print('Invalid URL: {}'.format(url))
# Wait `delay` seconds before next request`
time.sleep(args.delay)
xml_sitemap = f'''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n{''.join(sitemap_urls).strip()}\n</urlset>'''
with open(args.output, 'w') as f:
f.write(xml_sitemap)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=str, default='urls.txt')
parser.add_argument('--output', type=str, default='sitemap.xml')
parser.add_argument('--delay', type=float, default=0.5)
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment