Skip to content

Instantly share code, notes, and snippets.

@davidallenfox
Created January 15, 2014 15:00
Show Gist options
  • Save davidallenfox/8437814 to your computer and use it in GitHub Desktop.
Save davidallenfox/8437814 to your computer and use it in GitHub Desktop.
Converts a list of items to a chunked sitemap. Run on the command line; first argument is the text file; second argument is the amount with which to 'chunk'.
from itertools import islice
from lxml import etree, objectify
import sys
def main(filen,chunk):
increment = 0
with open(filen) as f:
while True:
increment = increment + 1
smf = "output_"+str(increment)+".xml"
next_n_lines = list(islice(f, int(chunk)))
NSMAP = {None : "http://www.sitemaps.org/schemas/sitemap/0.9"}
urlset = etree.Element("{http://www.sitemaps.org/schemas/sitemap/0.9}urlset", nsmap=NSMAP)
if not next_n_lines:
break
for line in next_n_lines:
item = line.rstrip()
url = etree.SubElement(urlset, "{http://www.sitemaps.org/schemas/sitemap/0.9}url")
loc = etree.SubElement(url, "{http://www.sitemaps.org/schemas/sitemap/0.9}loc")
loc.text = item
xml = etree.tostring(urlset, pretty_print=True, xml_declaration=True)
et = etree.ElementTree(urlset)
et.write(smf, pretty_print=True, xml_declaration=True, encoding="UTF-8")
if __name__ == "__main__":
main(sys.argv[1],sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment