Created
April 24, 2014 01:18
-
-
Save zhuowei/11238227 to your computer and use it in GitHub Desktop.
How real programmers extract text from ePubs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Convert!</title> | |
<script> | |
"use strict"; | |
var output = ""; | |
var index = 0; | |
var frame; | |
function frameLoad() { | |
var text = frame.contentWindow.document.body.textContent; | |
output += text; | |
var nextName = fileNames[index++]; | |
if (nextName) { | |
frame.src = nextName; | |
} else { | |
out.value = output; | |
} | |
} | |
function loadHandler() { | |
frame = document.getElementById("derp"); | |
out = document.getElementById("out"); | |
frame.onload = frameLoad; | |
frame.src = fileNames[index++]; | |
} | |
window.onload = loadHandler; | |
var fileNames = [ | |
//"cover.xhtml", | |
"body.xhtml", | |
"contents.xhtml", | |
"body1.xhtml", | |
"body2.xhtml", | |
"body3.xhtml", | |
"body4.xhtml", | |
"body5.xhtml", | |
"body6.xhtml", | |
"body7.xhtml", | |
"body8.xhtml", | |
"body9.xhtml", | |
"body10.xhtml", | |
"body11.xhtml", | |
"body12.xhtml", | |
"body13.xhtml", | |
"body14.xhtml", | |
"body15.xhtml", | |
"body16.xhtml", | |
"body17.xhtml", | |
"body18.xhtml", | |
"body19.xhtml", | |
"body20.xhtml", | |
"body21.xhtml", | |
"body22.xhtml", | |
"body23.xhtml", | |
"body24.xhtml", | |
"body25.xhtml", | |
"body26.xhtml", | |
"body27.xhtml", | |
"body28.xhtml", | |
"body29.xhtml", | |
"body30.xhtml", | |
"body31.xhtml", | |
"body32.xhtml", | |
"body33.xhtml" | |
]; | |
</script> | |
</head> | |
<body> | |
<iframe id="derp"></iframe> | |
<textarea id="out"></textarea> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment