makmac213/sitemap_status_crawler.py

## sitemap_status_crawler.py
import requests
from BeautifulSoup import BeautifulSoup

resp = requests.get('http://www.ofwguru.com/sitemap.xml')

soup = BeautifulSoup(resp.content)
urls = soup.findAll('url')

for url in urls:
    loc = url.find('loc').string
    resp = requests.get(loc)
    print loc, resp.status_code
    # log urls that are not 200 status
	import requests
	from BeautifulSoup import BeautifulSoup

	resp = requests.get('http://www.ofwguru.com/sitemap.xml')

	soup = BeautifulSoup(resp.content)
	urls = soup.findAll('url')

	for url in urls:
	loc = url.find('loc').string
	resp = requests.get(loc)
	print loc, resp.status_code
	# log urls that are not 200 status