Skip to content

Instantly share code, notes, and snippets.

@zhuowei
Created April 24, 2014 01:18
Show Gist options
  • Save zhuowei/11238227 to your computer and use it in GitHub Desktop.
Save zhuowei/11238227 to your computer and use it in GitHub Desktop.
How real programmers extract text from ePubs
<!DOCTYPE html>
<html>
<head>
<title>Convert!</title>
<script>
"use strict";
var output = "";
var index = 0;
var frame;
function frameLoad() {
var text = frame.contentWindow.document.body.textContent;
output += text;
var nextName = fileNames[index++];
if (nextName) {
frame.src = nextName;
} else {
out.value = output;
}
}
function loadHandler() {
frame = document.getElementById("derp");
out = document.getElementById("out");
frame.onload = frameLoad;
frame.src = fileNames[index++];
}
window.onload = loadHandler;
var fileNames = [
//"cover.xhtml",
"body.xhtml",
"contents.xhtml",
"body1.xhtml",
"body2.xhtml",
"body3.xhtml",
"body4.xhtml",
"body5.xhtml",
"body6.xhtml",
"body7.xhtml",
"body8.xhtml",
"body9.xhtml",
"body10.xhtml",
"body11.xhtml",
"body12.xhtml",
"body13.xhtml",
"body14.xhtml",
"body15.xhtml",
"body16.xhtml",
"body17.xhtml",
"body18.xhtml",
"body19.xhtml",
"body20.xhtml",
"body21.xhtml",
"body22.xhtml",
"body23.xhtml",
"body24.xhtml",
"body25.xhtml",
"body26.xhtml",
"body27.xhtml",
"body28.xhtml",
"body29.xhtml",
"body30.xhtml",
"body31.xhtml",
"body32.xhtml",
"body33.xhtml"
];
</script>
</head>
<body>
<iframe id="derp"></iframe>
<textarea id="out"></textarea>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment