Skip to content

Instantly share code, notes, and snippets.

@julsam
Created May 19, 2011 15:03
Show Gist options
  • Save julsam/980958 to your computer and use it in GitHub Desktop.
Save julsam/980958 to your computer and use it in GitHub Desktop.
Google profiles
import sys
import os.path
import xml.dom.minidom
import urllib2
try:
f = open("profiles-sitemap.xml")
except:
sys.exit("Error opening file")
try:
doc = xml.dom.minidom.parse(f)
except:
sys.exit("Error processing XML file")
f.close()
# root
rootElement = doc.documentElement
print rootElement.nodeName
for link in rootElement.getElementsByTagName("loc"):
if link.firstChild.data != "\n":
print link.firstChild.data
f = open("data/"+os.path.split(link.firstChild.data)[1], "w")
data = urllib2.urlopen(link.firstChild.data)
f.write(data.read())
f.close()
doc.unlink() # remove DOM tree from memory
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment