Skip to content

Instantly share code, notes, and snippets.

@jianyun8023
Last active March 3, 2024 15:01
Show Gist options
  • Save jianyun8023/33f144b93b68ac0fac6fbe8b9c4b6c7b to your computer and use it in GitHub Desktop.
Save jianyun8023/33f144b93b68ac0fac6fbe8b9c4b6c7b to your computer and use it in GitHub Desktop.
weread download,直接生成epub。仅用于技术研究。目前已失效
// ==UserScript==
// @name 微信读书下载(已失效)
// @namespace http://tampermonkey.net/
// @version 0.5.2
// @description 下载微信读书的书籍资源
// @author tang
// @match https://weread.qq.com/web/reader/*
// @grant unsafeWindow
// @grant GM_setValue
// @grant GM_getValue
// @grant GM_xmlhttpRequest
// @run-at document-idle
// @connect res.weread.qq.com
// @connect tencent-cloud.com
// @connect myqcloud.com
// @require https://cdn.bootcss.com/jszip/3.2.2/jszip.js
// @require https://cdn.bootcss.com/jquery/3.4.1/jquery.min.js
// @require https://unpkg.com/art-template/lib/template-web.js
// ==/UserScript==
(function () {
'use strict';
class Ebook {
constructor(id, title, author, intro, publisher, publishTime, maxLevel) {
this.id = id;
this.title = title;
this.author = author;
this.intro = intro;
this.publisher = publisher;
this.publishTime = publishTime;
this.maxLevel = maxLevel;
this.images = [];
};
setCpid(cpid) {
this.cpid = cpid;
}
setIsbn(isbn) {
this.isbn = isbn;
}
setChapterList(chapterList) {
this.chapterList = chapterList;
}
setImages(images) {
this.images = images;
}
}
class Chapter {
constructor(uid, path, title, level, playOrder) {
this.uid = uid;
this.path = path;
this.title = title;
this.level = level;
this.playOrder = playOrder;
this.subChapter = [];
};
addSubChapter(chapter) {
this.subChapter.push(chapter)
}
getLastSubChapter() {
return this.subChapter[this.subChapter.length - 1]
}
}
const buildEbook = book => {
var maxLevel = 1
var chapterList = []
var prveFirstLevelChapter
book.chapterInfos.forEach((element, i) => {
var chapter = new Chapter(element.chapterIdx, element.chapterIdx + ".html", element.title, element.level, i + 1)
if (chapter.level > maxLevel) {
maxLevel = chapter.level
}
if (chapter.level == 1) {
chapterList.push(chapter)
prveFirstLevelChapter = chapter
} else if (chapter.level == 2) {
prveFirstLevelChapter.addSubChapter(chapter)
} else if (chapter.level == 3) {
if (prveFirstLevelChapter.getLastSubChapter() == undefined) {
prveFirstLevelChapter.addSubChapter(chapter)
} else {
prveFirstLevelChapter.getLastSubChapter().addSubChapter(chapter)
}
} else if (chapter.level == 4) {
if (prveFirstLevelChapter.getLastSubChapter().getLastSubChapter() == undefined) {
prveFirstLevelChapter.getLastSubChapter().addSubChapter(chapter)
} else {
prveFirstLevelChapter.getLastSubChapter().getLastSubChapter().addSubChapter(chapter)
}
} else {
alert("暂不支持五级目录深度 " + chapter.level)
return
}
});
var ebook = new Ebook(
book.bookInfo.bookId,
book.bookInfo.title,
book.bookInfo.author,
book.bookInfo.intro,
book.bookInfo.publisher,
book.bookInfo.publishTime,
maxLevel
)
ebook.setChapterList(chapterList)
ebook.setIsbn(book.bookInfo.isbn)
ebook.setCpid(book.bookInfo.cpid)
ebook.setImages(bookImages)
return ebook
}
const sleep = ms => {
return new Promise(resolve =>
setTimeout(resolve, ms)
)
}
function get(url, headers, type) {
return new Promise((resolve, reject) => {
let requestObj = GM_xmlhttpRequest({
method: "GET", url, headers,
responseType: type || 'json',
onload: (res) => {
if (res.status === 204) {
requestObj.abort();
}
if (type === 'blob') {
resolve(res.response);
} else {
resolve(res.response || res.responseText);
}
},
onerror: (err) => {
reject(err);
},
});
});
}
function createAndDownloadFile(fileName, content) {
var aTag = document.createElement('a');
aTag.download = fileName;
aTag.href = URL.createObjectURL(content);
aTag.click();
URL.revokeObjectURL(content);
}
const imageUrlToBlob = url => get(url, {}, 'blob')
//var $ = unsafeWindow.$
var vue = $("div.readerContent.routerView")[0]
const parseCss = cssText => cssText.replace(/\.readerChapterContent/g, "")
const fixBody = failBody => failBody.replace("</body>", "</div>")
const regex = /<(hr|img|br)((\s+[\w-]+=".*?")*)>/gm;
const subst = `<$1$2/>`;
const chapter_html_tpl = `<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-CN" lang="zh-CN">
<head>
<title>{{ book.currentChapter.title }}</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<link rel="stylesheet" type="text/css" href="../css/flow.css"/>
</head>
<body>
{{@ body }}
</body>
</html>`
const buildHtml = (book, body) => {
var html = template.render(chapter_html_tpl, { "book": book, "body": body })
// 补全hr、img、br标签的封闭结构
return html.replace(regex, subst);
}
function cleanAttr(element) {
element.removeAttr("data-wr-co")
element.removeAttr("data-wr-bd")
element.removeAttr("data-wr-id")
element.removeAttr("data-ratio")
element.removeAttr("data-w")
element.removeAttr("data-w-new")
element.removeData("wr-co")
element.removeData("wr-bd")
element.removeData("wr-id")
element.removeData("ratio")
element.removeData("w")
element.removeData("w-new")
}
function cleanTag(element) {
cleanAttr(element)
element.html(element.text())
}
const bookImages = []
const log = str => {
$('.readerMemberCardTips').attr("style", "")
$('.readerMemberCardTips > .text').html(str)
}
const replaceImages = (doc, zip) => {
doc.find("img")
.each(function () {
var img = $(this)
var url = img.attr("data-src");
console.log("处理图片 " + url)
if (url.indexOf("http") == -1) return
var imageName = url.substr(url.lastIndexOf("/") + 1)
if (imageName.indexOf(".") == -1) imageName += ".jpg"
zip.file("img/" + imageName, imageUrlToBlob(url))
img.attr("src", "../img/" + imageName)
img.removeAttr("data-src")
bookImages.push("img/" + imageName)
})
return doc.html()
}
const cleanHtml = doc => {
doc.find("div").each(function () {
cleanAttr($(this))
})
doc.find("img").each(function () {
cleanAttr($(this))
})
doc.find("hr").each(function () {
cleanAttr($(this))
})
doc.find("h1").each(function () {
cleanTag($(this))
})
doc.find("h2").each(function () {
cleanTag($(this))
})
doc.find("h3").each(function () {
cleanTag($(this))
})
doc.find("p").each(function () {
if ($(this).find("img").length > 0) {
cleanAttr($(this))
} else {
cleanTag($(this))
}
})
}
var tocncx = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="zh-CN">
<head>
<meta name="dtb:uid" content="{{book.id}}"/>
<meta name="dtb:depth" content="{{book.maxLevel}}"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>{{ book.title }}</text>
</docTitle>
<docAuthor>
<text>{{ book.author }}</text>
</docAuthor>
<navMap>
{{each book.chapterList}}
<navPoint class="chapter" id="chapter_{{$value.uid}}" playOrder="{{$value.playOrder}}">
<navLabel>
<text>{{$value.title}}</text>
</navLabel>
<content src="text/{{$value.path}}"/>
{{each $value.subChapter}}
<navPoint class="chapter" id="chapter_{{$value.uid}}" playOrder="{{$value.playOrder}}">
<navLabel>
<text>{{$value.title}}</text>
</navLabel>
<content src="text/{{$value.path}}"/>
{{each $value.subChapter}}
<navPoint class="chapter" id="chapter_{{$value.uid}}" playOrder="{{$value.playOrder}}">
<navLabel>
<text>{{$value.title}}</text>
</navLabel>
<content src="text/{{$value.path}}"/>
{{each $value.subChapter}}
<navPoint class="chapter" id="chapter_{{$value.uid}}" playOrder="{{$value.playOrder}}">
<navLabel>
<text>{{$value.title}}</text>
</navLabel>
<content src="text/{{$value.path}}"/>
</navPoint>
{{/each}}
</navPoint>
{{/each}}
</navPoint>
{{/each}}
</navPoint>
{{/each}}
</navMap>
</ncx>`;
var tochtml = `<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Table of Contents</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<h1><b>TABLE OF CONTENTS</b></h1>
<br/>
{{each book.chapterList}}
<h3><b><a href="{{$value.path}}">{{$value.title}}</a></b></h3>
{{if $value.subChapter}}
<ul>
{{each $value.subChapter}}
<li><a href="{{$value.path}}">{{$value.title}}</a></li>
{{if $value.subChapter}}
<ul>
{{each $value.subChapter}}
<li><a href="{{$value.path}}">{{$value.title}}</a></li>
{{if $value.subChapter}}
<ul>
{{each $value.subChapter}}
<li><a href="{{$value.path}}">{{$value.title}}</a></li>
{{/each}}
</ul>
{{/if}}
{{/each}}
</ul>
{{/if}}
{{/each}}
</ul>
{{/if}}
{{/each}}
</body>
</html>`;
var opf_tmp = `<?xml version="1.0" encoding="utf-8"?>
<package version="2.0" unique-identifier="BookId" xmlns="http://www.idpf.org/2007/opf">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dc:title>{{ book.title }}</dc:title>
<dc:language>zh-cn</dc:language>
<dc:creator>{{ book.author }}</dc:creator>
{{if book.intro}}
<dc:description>&lt;div&gt;
&lt;p&gt;{{book.intro}}&lt;/p&gt;&lt;/div&gt;
</dc:description>
{{/if}}
{{if book.publisher}}
<dc:publisher>{{book.publisher}}</dc:publisher>
{{/if}}
{{if book.publishTime}}
<dc:date>{{book.publishTime}}</dc:date>
{{/if}}
{{if book.isbn}}
<dc:identifier opf:scheme="ISBN">{{book.isbn}}</dc:identifier>
{{/if}}
{{if book.cpid}}
<dc:identifier opf:scheme="CPID">{{book.cpid}}</dc:identifier>
{{/if}}
<meta name="cover" content="cover"/>
</metadata>
<manifest>
<item id="cover" href="cover.jpg" media-type="image/jpeg"/>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
<item id="titlepage" href="titlepage.xhtml" media-type="application/xhtml+xml"/>
<item id="toc_html" href="text/toc.html" media-type="application/xhtml+xml"/>
{{each book.chapterList}}
<item id="chapter_{{$value.uid}}" href="text/{{$value.path}}" media-type="application/xhtml+xml"/>
{{each $value.subChapter}}
<item id="chapter_{{$value.uid}}" href="text/{{$value.path}}" media-type="application/xhtml+xml"/>
{{each $value.subChapter}}
<item id="chapter_{{$value.uid}}" href="text/{{$value.path}}" media-type="application/xhtml+xml"/>
{{each $value.subChapter}}
<item id="chapter_{{$value.uid}}" href="text/{{$value.path}} media-type="application/xhtml+xml""/>
{{/each}}
{{/each}}
{{/each}}
{{/each}}
<item id="css" href="css/flow.css" media-type="text/css"/>
{{each book.images}}
<item id="image_{{$index}}" href="{{$value}}" media-type="image/jpeg"/>
{{/each}}
</manifest>
<spine toc="ncx">
<itemref idref="titlepage"/>
<itemref idref="toc_html"/>
{{each book.chapterList}}
<itemref idref="chapter_{{$value.uid}}"/>
{{each $value.subChapter}}
<itemref idref="chapter_{{$value.uid}}"/>
{{each $value.subChapter}}
<itemref idref="chapter_{{$value.uid}}"/>
{{each $value.subChapter}}
<itemref idref="chapter_{{$value.uid}}"/>
{{/each}}
{{/each}}
{{/each}}
{{/each}}
</spine>
<guide>
</guide>
</package>`;
var cover_html_text = `<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<title>Cover</title>
<style type="text/css" title="override_css"> @page {padding: 0pt; margin:0pt} body { text-align: center; padding:0pt; margin: 0pt; } </style>
</head>
<body>
<div>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="100%" height="100%" viewBox="0 0 877 1283" preserveAspectRatio="none"><image width="877" height="1283" xlink:href="../cover.jpg"/></svg>
</div>
</body>
</html>`;
var containerStr = `<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>`;
var addMetadata = (book, zip) => {
var toc = book.bookInfo.title
book.chapterInfos.forEach(element => {
var levelStr = "#".repeat(element.level)
toc += "\n" + levelStr + " " + element.title
});
log("addMetadata")
console.log(toc)
//zip.file("toc.md", toc);
var ebook = buildEbook(book)
zip.file("toc.ncx", template.render(tocncx, { "book": ebook }));
zip.file("css/flow.css", parseCss(book.chapterContentStyles));
zip.file("titlepage.xhtml", cover_html_text);
zip.file("text/toc.html", template.render(tochtml, { "book": ebook }));
zip.file("content.opf", template.render(opf_tmp, { "book": ebook }));
zip.file("mimetype", "application/epub+zip");
zip.file("META-INF/container.xml", containerStr)
}
var addInfo = (book, zip) => {
log("addInfo")
book.bookInfo.cover = $('img.wr_bookCover_img').attr("src")
zip.file("bookInfo.json", JSON.stringify(book.bookInfo));
zip.file("chapterInfos.json", JSON.stringify(book.chapterInfos));
//zip.file("readme.txt", "使用kindlegen生成电子书,执行命令:\nkindlegen -dont_append_source " + book.bookInfo.title + ".opf");
}
var addCover = (book, zip) => {
log("addCover")
book.bookInfo.cover = $('img.wr_bookCover_img').attr("src")
zip.file("cover.jpg", imageUrlToBlob(book.bookInfo.cover));
}
var count = 0
var addChapter = (book, zip) => {
log("正在下载数据 " + (count + 1) + "/" + book.chapterInfos.length + " : " + book.currentChapter.title)
var rawBody = $(fixBody('<div>' + book.chapterContentForEPub.join('') + '</div>'))
cleanHtml(rawBody)
var body = replaceImages(rawBody, zip)
var newHtml = buildHtml(book, body);
zip.file("text/" + book.currentChapter.chapterIdx + ".html", newHtml);
count++
}
var download = (book, zip) => {
if (count >= book.chapterInfos.length) {
addMetadata(book, zip)
console.log("生成epub文件")
// if (count >= 4) {
zip.generateAsync({ type: "blob" })
.then(function (content) {
unsafeWindow.rawBook = content
log('已获取全部数据,点击<a href="javascript:" title="下载" class="click_download">下载</a>')
$(".click_download").click(function () {
if (unsafeWindow.rawBook) {
createAndDownloadFile(book.bookInfo.title + ".epub", unsafeWindow.rawBook);
} else {
log("缺失文件,请重新下载")
}
})
$(".click_download").click()
});
return
}
sleep(3000).then(() => {
book.handleNextChapter().then(() => {
addChapter(book, zip)
download(book, zip)
});
})
}
sleep(5000).then(() => {
var book = vue.__vue__
unsafeWindow.book = book
var downloadBtn = '<button title="下载" class="readerControls_item download1"><span class="icon" style="background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAABmJLR0QA/wD/AP+gvaeTAAABLUlEQVR4nO2aTQ6CMBSER08kt3GJrDwGSw+t4oYXY8PPa0WHtvMlXZAAnflKAgEAIYQQQtTKgTj3EGxTshwZk+4JCWAHYCMB7ABsJIAdgI0EsAOwkQB2ADYSwA7ARgLYAdhIADsAGwlgB2AjAewAbLYWcNr4fKw5kugBPAFcnfsPwfBwAfAAcItO92N6vIt4JcQKsPK2/24kNPgMNozb3cpxMQK6mTma5NQbE66O50rwCkg5N4XYoB4B2ZQ3YgKvCciuvOENviQg2/KGp8CcgOzLG2tFpgQUU95YKhQKKK68MXcPDwWkPEtkw9TqLo0iVj7EK6HI8saahKLLG3MSqihvhBKqKm+0AO7jaMlZaJzHIYTg8M3/uZ73eP8kqYu+C7ADsKlegBBCCCHq5QW/t89vbCrH2wAAAABJRU5ErkJggg==);"></span></button>';
$('button.catalog').after(downloadBtn);
$(".download1").click(function () {
if (!book.isEPub) {
alert("该书源非EPUB,暂不支持下载!")
}
var zip = new JSZip();
unsafeWindow.$zip = zip
// addInfo(book, zip)
addCover(book, zip)
book.changeChapter({ 'chapterUid': book.chapterInfos[0]['chapterUid'] }).then(() => {
addChapter(book, zip)
download(book, zip)
})
})
console.log("微信读书下载插件已加载!")
console.log(buildEbook(book))
})
})();
@jianyun8023
Copy link
Author

微信读书的网页逻辑又改了,需要调整下代码

@jianyun8023
Copy link
Author

jianyun8023 commented Jun 16, 2022

更新说明

0.5

  • 使用正则修复html封闭符简写问题
  • 增加封面到书籍第一页
  • 修复目录html路径错误问题

@jianyun8023
Copy link
Author

// TODO

  • 直接生成epub文件,不再生成kindlegen的中间文件。

@jianyun8023
Copy link
Author

已支持生成epub文件

@Ryan817
Copy link

Ryan817 commented Jun 17, 2022

大佬,请问有些页数比较多的书籍,执行到最后会报out of memory,有啥优化的方法么?

@Sipoon
Copy link

Sipoon commented Jun 18, 2022

提示暂不支持五级目录深度 5 ?
机智如我,直接把==4改成>=4就可以下载了

@jianyun8023
Copy link
Author

大佬,请问有些页数比较多的书籍,执行到最后会报out of memory,有啥优化的方法么?

目前我没什么办法。可能能用浏览器local storage API缓存数据。目前都是把数据缓存到了内存,包括数据中的图片。

@jianyun8023
Copy link
Author

提示暂不支持五级目录深度 5 ? 机智如我,直接把==4改成>=4就可以下载了

怎么会有这么奇怪的需求?一般书籍目录有 3级就可以了。

@Sipoon
Copy link

Sipoon commented Jun 20, 2022

还有个问题,有时导出来的书不全,只有每一章第一页的内容,不会自动加载“下一页”的内容去读取。。

@jianyun8023
Copy link
Author

还有个问题,有时导出来的书不全,只有每一章第一页的内容,不会自动加载“下一页”的内容去读取。。

这个你可以给我下书名,我看看什么情况。我遇到微信读书的每章,都会加载本章的全部内容。没遇到再分页的。

@jianyun8023
Copy link
Author

@Sipoon 更新下试试

@jianyun8023
Copy link
Author

推荐去Z-Library找找书,没必要都在微信读书上搞。最近没有时间排查这些问题。有空的话会完善一下。

@owlwang
Copy link

owlwang commented Jun 21, 2022

《编码:隐匿在计算机软硬件背后的语言》这本书好像没法正常下载

@taocwang
Copy link

这个脚本真是太牛了,但是遇到了@Sipoon 同样的问题,当书籍的某章有多页时,只会读取第一页,不会自动加载“下一页”的读取内容。希望大神解决,谢谢。
如地铁2033的第一章:https://weread.qq.com/web/reader/9aa32a00726d709d9aa9668

@jianyun8023
Copy link
Author

@taocwang @Sipoon 分页问题已经修复。

@jianyun8023
Copy link
Author

jianyun8023 commented Sep 20, 2022

新脚本已经修复html错误问题
下载的书在ibook中遇到章节格式错误,可以使用sigil打开一下,会提示是否修复html错误,点击确认修复,再保存就好了。

@taocwang
Copy link

谢谢更新,还有个小问题,页面中的注(点击注标识,弹出窗口显示的注)没能下载显示,不知能否解决😂,谢谢!
如这本小说里面的注:https://weread.qq.com/web/reader/675326e0813ab6ffcg019c10ka87322c014a87ff679a21ea

@jianyun8023
Copy link
Author

谢谢更新,还有个小问题,页面中的注(点击注标识,弹出窗口显示的注)没能下载显示,不知能否解决😂,谢谢! 如这本小说里面的注:https://weread.qq.com/web/reader/675326e0813ab6ffcg019c10ka87322c014a87ff679a21ea

暂时没有计划支持注脚。

@taocwang
Copy link

taocwang commented Nov 2, 2022

似乎失效了😭

@zhl111
Copy link

zhl111 commented Jan 14, 2023

请问我点击下载没有任何反应呢?

@jianyun8023
Copy link
Author

目前失效了,我研究过新的方式,并不能解决。so,该放弃这个脚本了。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment