bitchwhocodes/gist:9102400

## gistfile1.txt
import nltk
import epub
book = epub.open_epub('design-is-a-job.epub')

for item in book.opf.manifest.values():
    # read the content
    data  =  book.read_item( item )
    if 'html' in item.href and 'chap' in item.href:
        print item.href
        #print data
        raw = nltk.clean_html(data)
        print(raw)
	import nltk
	import epub
	book = epub.open_epub('design-is-a-job.epub')

	for item in book.opf.manifest.values():
	# read the content
	data = book.read_item( item )
	if 'html' in item.href and 'chap' in item.href:
	print item.href
	#print data
	raw = nltk.clean_html(data)
	print(raw)