Last active
August 12, 2022 07:43
-
-
Save nomadrat/965657eda0633066146eb79029dd0d94 to your computer and use it in GitHub Desktop.
create sitemap. python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# this is part of https://abstractkitchen.com/blog/sitemaps-for-devs | |
# | |
import os | |
import gzip | |
from xml.etree import cElementTree | |
def add_url(root_node, url, lastmod): | |
doc = cElementTree.SubElement(root_node, "url") | |
cElementTree.SubElement(doc, "loc").text = url | |
cElementTree.SubElement(doc, "lastmod").text = lastmod | |
return doc | |
def add_url_image(url_node, image_url): | |
image_node = cElementTree.SubElement(url_node, "image:image") | |
cElementTree.SubElement(image_node, "image:loc").text = image_url | |
return image_node | |
def add_sitemap_url(root_node, sitemap_url): | |
sitemap_url_node = cElementTree.SubElement(root_node, "sitemap") | |
cElementTree.SubElement(sitemap_url_node, "loc").text = sitemap_url | |
return sitemap_url_node | |
def save_sitemap(root_node, save_as, **kwargs): | |
compress = kwargs.get("compress", False) | |
sitemap_name = save_as.split("/")[-1] | |
dest_path = "/".join(save_as.split("/")[:-1]) | |
sitemap_name = f"{sitemap_name}.xml" | |
if compress: | |
sitemap_name = f"{sitemap_name}.gz" | |
save_as = f"{dest_path}/{sitemap_name}" | |
# create sitemap path if not existed | |
if not os.path.exists(f"{dest_path}/"): | |
os.makedirs(f"{dest_path}/") | |
if not compress: | |
tree = cElementTree.ElementTree(root_node) | |
tree.write(save_as, encoding='utf-8', xml_declaration=True) | |
else: | |
# gzip sitemap | |
gzipped_sitemap_file = gzip.open(save_as, 'wb') | |
cElementTree.ElementTree(root_node).write(gzipped_sitemap_file) | |
gzipped_sitemap_file.close() | |
return sitemap_name | |
# ===== create <urlset> tag aka regular sitemap ===== | |
# create root XML node | |
sitemap_root = cElementTree.Element('urlset') | |
sitemap_root.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" | |
# add urls | |
url_1 = add_url(sitemap_root, "https://example.com/url-1", "2022-04-07"), | |
add_url_image(url_1, "https://example.com/image-1.jpg") # if you want to append image to your url | |
add_url(sitemap_root, "https://example.com/url-2", "2022-04-07") | |
add_url(sitemap_root, "https://example.com/url-3", "2022-04-07") | |
# save sitemap | |
save_sitemap(sitemap_root, "sitemaps/sitemap") | |
# ===== create <sitemapindex> tag ===== | |
sitemap_index_root = cElementTree.Element('sitemapindex') | |
sitemap_index_root.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" | |
# append links to other sitemaps | |
add_sitemap_url(sitemap_index_root, "https://example.com/sitemap1.xml") | |
add_sitemap_url(sitemap_index_root, "https://example.com/sitemap2.xml") | |
save_sitemap(sitemap_index_root, "sitemaps/sitemap") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment