Skip to content

Instantly share code, notes, and snippets.

@mirnovov

mirnovov/sitemap.py Secret

Created Nov 1, 2020
Embed
What would you like to do?
Basic Sitemap Generator
#!/usr/bin/env python3
# The MIT License (MIT)
#
# Copyright (c) 2020 novov
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
# the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import json, os
def searchHTML(directory,priorities,auto,root = "/"):
result = {}
basepriority = 0.6 if root == "/" and auto else 0.5
with os.scandir(directory) as files:
for file in files:
name = root + file.name
priority = priorities[name] if name in priorities else basepriority
if priority < 0.1: continue
elif file.name == "index.html" and auto: priority += 0.1
priority = clamp(priority,0,1)
if file.name.endswith(".html"):
result[name] = priority
elif file.is_dir():
newdir = directory + file.name + "/"
newroot = root + file.name + "/"
result.update(searchHTML(newdir,priorities,auto,newroot))
return result
def clamp(n, minn, maxn):
return max(min(maxn, n), minn)
def main():
parampath = os.path.split(os.path.realpath(__file__))[0] + '/sitemap.json';
if os.path.isfile(parampath):
with open(parampath, 'r') as f:
params = json.loads(f.read())
else: return -1
text = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"
entry = """
<url>
<loc>{0}{1}</loc>
<priority>{2:.1f}</priority>
</url>"""
for name, priority in searchHTML(params["site_path"],params["priorities"],params["automate_priorities"]).items():
text += entry.format(params["site_url"],name,priority)
text += "\n</urlset>"
with open(params["site_path"] + params["output_path"], 'w') as f: f.write(text)
if __name__ == '__main__': main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment