jsyeo/GetODJ.py

## GetODJ.py
#!/usr/bin/python -tt
import urllib
from lxml import etree
import simplejson

def main():
    '''main method:

    code here'''
    f = urllib.urlopen("http://ymiblogging.org/category/devotional/odj/")
    body = f.read()
    html = etree.HTML(body)

    #xpath selection
    titles = html.xpath("//h3/a/text()")
    urls = [elem.attrib["href"] for elem in html.xpath("//h3/a")]
    dates = html.xpath("//div[@class='post-meta']/strong/text()")

    if len(titles) is len(urls) and len(urls) is len(dates):
        odj_post = {}
        json_file = open("feed.json", "w")
        # for each title dump a json rep of the dict
        for (i, title) in enumerate(titles):
            odj_post["title"] = title
            odj_post["url"] = urls[i]
            odj_post["date"] = dates[i]
            json_file.write(simplejson.dumps(odj_post))
            json_file.write("\n")
        print "JSON output written to feed.json"
    else:
        print "HTML tag mis-match"

    return

if __name__ == '__main__':
    main()
	#!/usr/bin/python -tt
	import urllib
	from lxml import etree
	import simplejson

	def main():
	'''main method:

	code here'''
	f = urllib.urlopen("http://ymiblogging.org/category/devotional/odj/")
	body = f.read()
	html = etree.HTML(body)

	#xpath selection
	titles = html.xpath("//h3/a/text()")
	urls = [elem.attrib["href"] for elem in html.xpath("//h3/a")]
	dates = html.xpath("//div[@class='post-meta']/strong/text()")

	if len(titles) is len(urls) and len(urls) is len(dates):
	odj_post = {}
	json_file = open("feed.json", "w")
	# for each title dump a json rep of the dict
	for (i, title) in enumerate(titles):
	odj_post["title"] = title
	odj_post["url"] = urls[i]
	odj_post["date"] = dates[i]
	json_file.write(simplejson.dumps(odj_post))
	json_file.write("\n")
	print "JSON output written to feed.json"
	else:
	print "HTML tag mis-match"

	return

	if __name__ == '__main__':
	main()