Skip to content

Instantly share code, notes, and snippets.

@justindavies
Created July 5, 2019 06:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save justindavies/f260916acca3f9d30f78e4a7594b0fa6 to your computer and use it in GitHub Desktop.
Save justindavies/f260916acca3f9d30f78e4a7594b0fa6 to your computer and use it in GitHub Desktop.
import urllib, json
import glob
import os
import pymongo
from pymongo import MongoClient
# Set the latest Block form the API
client = MongoClient(os.environ["DB"])
db = client.fundy
docs = db.docs
filings_cursor = docs.find({}, {"filing": 1, "length": 1, "date": 1})
filings = []
for filing in filings_cursor:
if "length" in filing and filing["length"] > 1000:
filings.append({"filing": filing["filing"], "date": filing["date"]})
print("Gathered")
counter = 1
filename = 1
line = ""
for filing in filings:
line = line + "<url>\n"
line = line + "<loc>http://inkl.in/report/"+ filing["filing"] + "</loc>\n"
line = line + "<changefreq>weekly</changefreq>"
line = line + "<lastmod>" + str(filing["date"]) + "</lastmod>"
line = line + "</url>\n"
counter=counter+1
if counter == 50000:
counter = 1
print("Writing " + str(filename))
file = open(str(filename) + ".xml", "w")
file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
file.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
file.write(line)
file.write("</urlset>")
file.close()
filename = filename + 1
line = ""
sitemapindex = '<?xml version="1.0" encoding="UTF-8"?>\n'
sitemapindex = sitemapindex + '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
for file in glob.glob('*.xml'):
sitemapindex = sitemapindex + '<sitemap>\n'
sitemapindex = sitemapindex + ' <loc>http://inkl.in/sitemaps/' + file + '</loc>\n'
sitemapindex = sitemapindex + '</sitemap>\n'
sitemapindex = sitemapindex + '</sitemapindex>'
sitemap = open("map.xml", "w")
sitemap.write(sitemapindex)
sitemap.close()
print(sitemapindex)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment