Basic Sitemap Generator
#!/usr/bin/env python3 | |
# The MIT License (MIT) | |
# | |
# Copyright (c) 2020 novov | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining | |
# a copy of this software and associated documentation files (the "Software"), to deal in the | |
# Software without restriction, including without limitation the rights to use, copy, modify, merge, | |
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom | |
# the Software is furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all copies or | |
# substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | |
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | |
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT | |
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
import json, os | |
def searchHTML(directory,priorities,auto,root = "/"): | |
result = {} | |
basepriority = 0.6 if root == "/" and auto else 0.5 | |
with os.scandir(directory) as files: | |
for file in files: | |
name = root + file.name | |
priority = priorities[name] if name in priorities else basepriority | |
if priority < 0.1: continue | |
elif file.name == "index.html" and auto: priority += 0.1 | |
priority = clamp(priority,0,1) | |
if file.name.endswith(".html"): | |
result[name] = priority | |
elif file.is_dir(): | |
newdir = directory + file.name + "/" | |
newroot = root + file.name + "/" | |
result.update(searchHTML(newdir,priorities,auto,newroot)) | |
return result | |
def clamp(n, minn, maxn): | |
return max(min(maxn, n), minn) | |
def main(): | |
parampath = os.path.split(os.path.realpath(__file__))[0] + '/sitemap.json'; | |
if os.path.isfile(parampath): | |
with open(parampath, 'r') as f: | |
params = json.loads(f.read()) | |
else: return -1 | |
text = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">" | |
entry = """ | |
<url> | |
<loc>{0}{1}</loc> | |
<priority>{2:.1f}</priority> | |
</url>""" | |
for name, priority in searchHTML(params["site_path"],params["priorities"],params["automate_priorities"]).items(): | |
text += entry.format(params["site_url"],name,priority) | |
text += "\n</urlset>" | |
with open(params["site_path"] + params["output_path"], 'w') as f: f.write(text) | |
if __name__ == '__main__': main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment