Skip to content

Instantly share code, notes, and snippets.

@Gowee
Last active June 29, 2023 12:05
Show Gist options
  • Save Gowee/74a05e61c7bf71cc039c9272264f5603 to your computer and use it in GitHub Desktop.
Save Gowee/74a05e61c7bf71cc039c9272264f5603 to your computer and use it in GitHub Desktop.
Download PDF from read.nlc.cn
async function downloadBook(aid, bid) {
const [title, myreader, kime, fime] = await getBookKey(aid, bid)
const r = await fetch(`http://read.nlc.cn/menhu/OutOpenBook/getReader?aid=${aid}&bid=${bid}&kime=${kime}&fime=${fime}`, { method: "POST", headers: { myreader: myreader }})
const d = await r.arrayBuffer()
saveByteArray(title + ".pdf", d)
}
async function getBookKey(aid, bid) {
const r = await fetch(`http://read.nlc.cn/OutOpenBook/OpenObjectBook?aid=${aid}&bid=${bid}`)
const d = await r.text()
const title = /var title = \'(.+?)\'/.exec(d)[1]
const myreader = /tokenKey=\"(\w+)\"/.exec(d)[1]
const kime = /timeKey=\"(\w+)\"/.exec(d)[1]
const fime = /timeFlag=\"(\w+)\"/.exec(d)[1]
return [title, myreader, kime, fime]
}
// https://stackoverflow.com/a/37340749/5488616
function saveByteArray(reportName, byte) {
var blob = new Blob([byte], {type: "application/pdf"});
var link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
var fileName = reportName;
link.download = fileName;
link.click();
};
downloadBook("403", "8179.0")
// To be executed at http://read.nlc.cn/ in the browser console
// Project: throbbing-thunder-e93d
addEventListener("fetch", (event) => {
event.respondWith(
handleRequest(event.request).catch(
(err) => new Response(err.stack, { status: 500 })
)
);
});
/**
* Many more examples available at:
* https://developers.cloudflare.com/workers/examples
* @param {Request} request
* @returns {Promise<Response>}
*/
async function handleRequest(request) {
const { pathname, params } = new URL(request.url);
const [_a, _b, aid, bid] = pathname.split("/")
if (!aid || !bid) {
return new Response(`Invalid arguments, aid=${aid}, bid=${bid}`, { status: 403 })
}
console.log(aid, bid)
return fetchBook(aid, bid);
}
async function fetchBook(aid, bid) {
const [title, pdfname, myreader, kime, fime] = await getBookInfo(aid, bid)
let resp
console.log(pdfname)
// const filename = title + pdfname.slice(pdfname.lastIndexOf("/") + 1)
// const disposition = `inline; filename="${filename.replace('"', '\\"')}"`
if (pdfname) {
resp = await fetch("http://read.nlc.cn/doc2/" + pdfname)
// resp.headers.set('Content-Disposition', disposition)
status = resp.status
ok = resp.ok
if (resp.ok) {
return resp
}
}
resp = fetch(`http://read.nlc.cn/menhu/OutOpenBook/getReader?aid=${aid}&bid=${bid}&kime=${kime}&fime=${fime}`, { method: "POST", headers: { myreader: myreader }})
return resp
// return new Response(JSON.stringify([title, pdfname, myreader, kime, fime, status, ok]))
}
async function getBookInfo(aid, bid) {
const r = await fetch(`http://read.nlc.cn/OutOpenBook/OpenObjectBook?aid=${aid}&bid=${bid}.0`)
const d = await r.text()
if (d.includes("系统内部错误")) {
throw Error("Upstream error, check the arguments or just retry")
}
const title = /var title = \'(.*?)\'/.exec(d)[1] // allow empty
const pdfname = /var pdfname\s*=\s*\'([^']+)\';/.exec(d)[1]
const myreader = /tokenKey=\"(\w+)\"/.exec(d)[1]
const kime = /timeKey=\"(\w+)\"/.exec(d)[1]
const fime = /timeFlag=\"(\w+)\"/.exec(d)[1]
return [title, pdfname, myreader, kime, fime]
}
// Project: throbbing-thunder-e93d
const DECHUNK_SIZE = 1024 * 1024 * 1024 // 1 GiB
addEventListener("fetch", (event) => {
event.respondWith(
handleRequest(event.request).catch(
(err) => new Response(err.stack, { status: 500 })
)
);
});
/**
* Many more examples available at:
* https://developers.cloudflare.com/workers/examples
* @param {Request} request
* @returns {Promise<Response>}
*/
async function handleRequest(request) {
const ranging = !!request.headers.get("range")
const { pathname, searchParams: params } = new URL(request.url)
const pdfname = params.get("file_path")
if (pdfname) {
return await fetchPath(pdfname, ranging)
}
const [_a, _b, aid, bid] = pathname.split("/")
if (!aid || !bid) {
return new Response(`Invalid arguments, aid=${aid}, bid=${bid}`, { status: 403 })
}
console.log(aid, bid, ranging)
return fetchBook(aid, bid, ranging);
}
async function fetchPath(file_path, ranging) {
const resps = await Promise.all([1,2,3].map((i) => fetch(`http://read.nlc.cn/doc${i}/${file_path}`, {'method': 'HEAD'})))
const effres = resps.find((resp) => resp.ok)
if (!effres) {
return
}
const cl = parseInt(effres.headers.get('content-length'))
console.log("size:", cl)
console.log(Object.fromEntries(effres.headers.entries()))
// workers does not support Range?
if (ranging || cl <= DECHUNK_SIZE /*|| effres.headers.get('accept-ranges') != 'bytes'*/) {
console.log("direct streaming")
let resp = await fetch(effres.url)
resp = new Response(resp.body, resp)
// resp.headers.set('x-client-ranging', new String(ranging))
// resp.headers.set('x-server-headers', JSON.stringify(Object.fromEntries(resp.headers.entries())))
resp.headers.set('x-stream-type', 'direct')
resp.headers.set('x-orig-url', effres.url)
// resp.headers.set('content-disposition', )
return resp
} else {
console.log("dechunk streaming")
// https://community.cloudflare.com/t/workers-pipe-a-sequential-series-of-streams-to-response-without-blocking/311695/7
const { readable, writable } = new FixedLengthStream(cl)
const chunk = Math.ceil(cl / DECHUNK_SIZE)
const stream = async () => {
for (const i of Array.from(Array(chunk).keys())) {
const s = i * DECHUNK_SIZE
const e = Math.min((i + 1) * DECHUNK_SIZE, cl)
const resp = await fetch(effres.url, {'headers': {'Range': `bytes=${s}-${e - 1}`}})
console.log(`piping bytes=${s}..${e} (=${resp.headers.get('content-length')}), preventing close: ${e < cl}`)
// await resp.body.pipeTo(writable, {preventClose: e < cl})
await resp.body.pipeTo(writable, {preventClose: true})
}
console.log("closing")
const r = await writable.getWriter().close()
console.log("closed")
return r
}
stream()
const resp = new Response(readable, effres)
resp.headers.set('x-stream-type', 'dechunk')
resp.headers.set('x-orig-url', effres.url)
resp.headers.set('x-chunk-number', chunk)
return resp
}
}
async function fetchBook(aid, bid, ranging) {
const [title, pdfname, myreader, kime, fime] = await getBookInfo(aid, bid)
let resp
console.log(pdfname)
// const filename = title + pdfname.slice(pdfname.lastIndexOf("/") + 1)
// const disposition = `inline; filename="${filename.replace('"', '\\"')}"`
if (pdfname) {
const resp = await fetchPath(pdfname, ranging)
if (resp) {
return resp
}
}
resp = fetch(`http://read.nlc.cn/menhu/OutOpenBook/getReader?aid=${aid}&bid=${bid}&kime=${kime}&fime=${fime}`, { method: "POST", headers: { myreader: myreader } })
return resp
// return new Response(JSON.stringify([title, pdfname, myreader, kime, fime, status, ok]))
}
async function getBookInfo(aid, bid) {
const r = await fetch(`http://read.nlc.cn/OutOpenBook/OpenObjectBook?aid=${aid}&bid=${bid}.0`)
const d = await r.text()
if (d.includes("系统内部错误")) {
throw Error("Upstream error, check the arguments or just retry")
}
const title = /var title = \'(.*?)\'/.exec(d)[1] // allow empty
const pdfname = /var pdfname\s*=\s*\'([^']+)\';/.exec(d)[1]
const myreader = /tokenKey=\"(\w+)\"/.exec(d)[1]
const kime = /timeKey=\"(\w+)\"/.exec(d)[1]
const fime = /timeFlag=\"(\w+)\"/.exec(d)[1]
return [title, pdfname, myreader, kime, fime]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment