Created
January 15, 2014 15:00
-
-
Save davidallenfox/8437814 to your computer and use it in GitHub Desktop.
Converts a list of items to a chunked sitemap. Run on the command line; first argument is the text file; second argument is the amount with which to 'chunk'.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import islice | |
from lxml import etree, objectify | |
import sys | |
def main(filen,chunk): | |
increment = 0 | |
with open(filen) as f: | |
while True: | |
increment = increment + 1 | |
smf = "output_"+str(increment)+".xml" | |
next_n_lines = list(islice(f, int(chunk))) | |
NSMAP = {None : "http://www.sitemaps.org/schemas/sitemap/0.9"} | |
urlset = etree.Element("{http://www.sitemaps.org/schemas/sitemap/0.9}urlset", nsmap=NSMAP) | |
if not next_n_lines: | |
break | |
for line in next_n_lines: | |
item = line.rstrip() | |
url = etree.SubElement(urlset, "{http://www.sitemaps.org/schemas/sitemap/0.9}url") | |
loc = etree.SubElement(url, "{http://www.sitemaps.org/schemas/sitemap/0.9}loc") | |
loc.text = item | |
xml = etree.tostring(urlset, pretty_print=True, xml_declaration=True) | |
et = etree.ElementTree(urlset) | |
et.write(smf, pretty_print=True, xml_declaration=True, encoding="UTF-8") | |
if __name__ == "__main__": | |
main(sys.argv[1],sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment