mpevner/cat_import.py

## cat_import.py
from lxml.etree import parse
catalog = parse('catalog.rdf')
book_tag = '{http://www.gutenberg.org/rdfterms/}etext'
books = catalog.findall(book_tag)
file_tag = '{http://www.gutenberg.org/rdfterms/}file'
files = catalog.findall(file_tag)

#from here you can just do book = catalog[].getchildren() and start working on subelements ie: the book data
#books[].values() returns bookid
#further, files[].values() gets you filename
#and files[].getchildren() has isformatOf which the .values() is the extext the file is for
#eg: files[].getchildren[4].values() returns '#etext1'
	from lxml.etree import parse
	catalog = parse('catalog.rdf')
	book_tag = '{http://www.gutenberg.org/rdfterms/}etext'
	books = catalog.findall(book_tag)
	file_tag = '{http://www.gutenberg.org/rdfterms/}file'
	files = catalog.findall(file_tag)

	#from here you can just do book = catalog[].getchildren() and start working on subelements ie: the book data
	#books[].values() returns bookid
	#further, files[].values() gets you filename
	#and files[].getchildren() has isformatOf which the .values() is the extext the file is for
	#eg: files[].getchildren[4].values() returns '#etext1'