Skip to content

Instantly share code, notes, and snippets.

@GHolk
Last active November 26, 2022 18:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GHolk/1d125debefe6714d186190df421c7eb4 to your computer and use it in GitHub Desktop.
Save GHolk/1d125debefe6714d186190df421c7eb4 to your computer and use it in GitHub Desktop.
var downloadJob = tri.downloadJob = {
browser: tri.browserBg || browser,
os: 'windows',
native: false,
downloadPath: 'kwk',
stop: false,
ex: tri.excmds,
idList: [],
dlhandlerwrap(...args) {
return tri.downloadJob.dlhandler(...args)
},
dlhandler() {
},
async init() {
const tab = await this.browser.tabs.query({active: true, currentWindow: true})
this.tabId = tab[0].id
this.windowId = tab[0].windowId
return
this.dlhandle = function (e) {
const i = this.idList.findIndex(x => x.id == e.id)
if (i == -1) return
const job = this.idList[i]
let state
if (typeof e.state == 'object') state = e.state.current
else state = e.state
switch (e.state.current) {
case 'complete':
job.resolve()
case 'interrupted':
let error = new Error('download fail')
error.codeName = 'download-error'
job.reject(error)
this.idList.splice(i, 1)
default:
true
}
}
this.browser.downloads.onChanged.addListener(this.dlhandlerwrap)
this.browser.downloads.onCreated.addListener(this.dlhandlerwrap)
},
destroy() {
this.browser.downloads.onChanged.removeListener(this.dlhandlerwrap)
this.browser.downloads.onCreated.removeListener(this.dlhandlerwrap)
},
async download(url, file) {
let id;
try {
id = await this.browser.downloads.download({
filename: file,
url: url
});
} catch (fail) {
fail.codeName = 'download-error'
throw fail;
}
return {
id: id,
url: url,
file: file
}
},
getTitle(url) {
const path = url.pathname;
let title;
if (path.match(/index.php/)) {
title = url.searchParams.get("title");
} else title = decodeURIComponent(url.pathname.slice(1));
return title.replace(/[*"'.?/:]/g, c => "%" + c.charCodeAt().toString(16).toUpperCase());
},
async downloadFromAnchor(a) {
const url = new URL(a.href);
const title = this.getTitle(url);
const file = title + ".html";
const cacheNode = a.nextElementSibling.querySelector('a[href^="https://webcache"]');
if (!cacheNode) return 'no-cache';
if (this.native) {
if (await this.fileExist(file)) return 'skip'
}
const cache = cacheNode.href;
return await this.download(cache, `${this.downloadPath}/${file}`);
},
downloadCurrentHtml() {
function doctypeToString(node = document.doctype) {
if (!node) return ''
return '<!DOCTYPE ' + node.name
+ (node.publicId ? ` PUBLIC "${node.publicId}"` : '')
+ (!node.publicId && node.systemId ? ' SYSTEM' : '')
+ (node.systemId ? ` "${node.systemId}"` : '')
+ '>' + '\n'
}
function cleanCopy(root) {
const deep = true
const copy = root.cloneNode(deep)
copy.querySelectorAll('iframe[src ^= moz-extension')
.forEach(e => e.remove())
fixRelativeUrl(copy)
fixEncode(copy)
return copy
}
function fixEncode(root) {
if (document.characterSet == 'UTF-8') return
const list = root.querySelectorAll(
'meta[http-equiv=content-type],' +
'meta[http-equiv=Content-Type],' +
'meta[charset]'
)
if (list.length == 0) {
if (!confirm('not UTF-8 and no charset tag found, add one?')) return
const encodeNode = document.createElement('meta')
encodeNode.setAttribute('charset', 'utf-8')
encodeNode.dataset.gholkOriginalCharset = ''
const head = root.querySelector('head')
if (head) head.prepend(encodeNode)
else root.prepend(encodeNode)
}
else {
list.forEach(encodeNode => {
if (encodeNode.hasAttribute('charset')) {
const original = encodeNode.getAttribute('charset')
encodeNode.dataset.gholkOriginalCharset = original
encodeNode.setAttribute('charset', 'utf-8')
}
else if (encodeNode.hasAttribute('http-equiv')) {
encodeNode.dataset.gholkOriginalContentType = encodeNode.content
encodeNode.content = 'text/html; charset=UTF-8'
}
else alert(`unknown error while fix encode node: ${encodeNode.outerHTML}`)
})
}
}
function fixRelativeUrl(root) {
let base = root.querySelector('base')
if (base) {
const relative = base.getAttribute('href')
base.dataset.gholkOriginalHref = relative
base.setAttribute('href', base.href)
}
else {
base = document.createElement('base')
base.href = root.baseURI
base.dataset.gholkOriginalHref = ''
let head = root.querySelector('head')
if (head) head.prepend(base)
else root.prepend(base)
}
/*
copy.querySelectorAll('[href], [src]').forEach(e => {
const abs = /^\w+:\/\//
if ('href' in e && !abs.test(e.getAttribute('href'))) {
e.setAttribute('href', e.href)
}
else if ('src' in e && !abs.test(e.getAttribute('src'))) {
e.setAttribute('src', e.src)
}
})
*/
}
const html = doctypeToString() + cleanCopy(document.documentElement).outerHTML
const blob = new Blob([html], {type: 'text/html'})
const download = document.createElement('a')
download.download = document.title + '.html'
download.href = URL.createObjectURL(blob)
document.body.appendChild(download)
download.click()
download.remove()
URL.revokeObjectURL(blob)
},
async downloadFromPath(path) {
const title = path.slice(1)
const url = `https://wiki.komica.org/${title}`
const cache = 'http://webcache.googleusercontent.com/search?q=cache:' + url
this.cacheUrl = cache
const file = title.replace(/[*"'.?/:]/g, c => encodeURIComponent(c)) + '.html'
if (this.native && await this.fileExist(file)) return 'skip';
return await this.download(cache, this.downloadPath + '/' + file)
},
async fileExist(file) {
const ex = this.ex
const safe = await ex.shellescape(file)
let exist
if (this.os == 'windows') {
exist = await ex.exclaim_quiet(`if exist ..\\Downloads\\${this.downloadPath}\\${safe} echo exist`)
}
else exist = await ex.exclaim_quiet(`test -f $HOME/Downloads/${this.downloadPath}/${safe} && echo -n exist`);
return Boolean(exist)
},
async queryIdList(list) {
const ql = list.map(id => this.browser.downloads.search({id}))
return (await Promise.all(ql)).flat()
},
async dlax(method, n = 0) {
let l
let backupGoogleSearch = false
if (method == 'downloadFromPath') {
l = tri.state.komica_wiki_data
await this.init()
}
else if (method == 'downloadFromAnchor') {
l = $all('a>h3').map(h => h.parentNode)
}
else throw new Error('unknown method')
for (let i=n; i<l.length && !this.stop; true) {
console.log('line ' + i)
let errorDownload
let dl
try {
dl = await this[method](l[i])
}
catch (error) {
if (error.codeName != 'download-error') throw error
errorDownload = error
}
if (dl == 'skip') {
console.log('skip')
i++
continue
}
if (dl == 'no-cache') {
backupGoogleSearch = true
i++
continue
}
let retry = false
while (true) {
const state = (await this.browser.downloads.search({id: dl.id}))[0]
if (state.state == 'complete') {
await this.ex.sleep(3000)
break
}
if (state.state == 'in_progress') {
await this.ex.sleep(500)
continue
}
const tabp = await this.tabopen(dl.url)
const tab = await tabp
const is404p = this.check404(tab.id)
const first = await Promise.race([
is404p,
this.sleep(3).then(x => 'timeout')
])
if (first == 'error-404') {
await this.browser.tabs.remove(tab.id)
backupGoogleSearch = true
}
if (first != 'timeout') break
retry = await this.tabEval(
`confirm('retry ${i}? (or skip)')`
)
if (!retry) {
try {
await this.browser.tabs.remove(tab.id)
}
catch (error) {
console.error(error)
console.log('tab is already closed')
}
}
break
}
if (!retry) i++
}
if (backupGoogleSearch) {
this.downloadCurrentHtml()
await this.sleep(1)
}
if (method == 'downloadFromAnchor') {
if (confirm(`next?`)) $("#pnnext").click();
}
},
async dlap(n = 0) {
await this.init()
const l = tri.state.komica_wiki_data
for (let i=n; i<l.length && !this.stop; true) {
console.log('line ' + i)
let errorDownload
let dl
try {
dl = await this.downloadFromPath(l[i])
}
catch (error) {
if (error.codeName != 'download-error') throw error
errorDownload = error
}
if (dl == 'skip') {
console.log('skip')
i++
continue
}
let retry = false
while (true) {
const state = (await this.browser.downloads.search({id: dl.id}))[0]
if (state.state == 'complete') {
await this.ex.sleep(3000)
break
}
if (state.state == 'in_progress') {
await this.ex.sleep(500)
continue
}
const tabp = await this.tabopen(dl.url)
const tab = await tabp
const is404p = this.check404(tab.id)
const first = await Promise.race([
is404p,
this.sleep(3).then(x => 'timeout')
])
if (first == 'error-404') {
await this.browser.tabs.remove(tab.id)
}
if (first != 'timeout') break
retry = await this.tabEval(
`confirm('retry ${i}? (or skip)')`
)
if (!retry) {
try {
await this.browser.tabs.remove(tab.id)
}
catch (error) {
console.error(error)
console.log('tab is already closed')
}
}
break
}
if (!retry) i++
}
},
sleep(s) {
return new Promise(wake => setTimeout(wake, s*1000))
},
async check404(tabId) {
while (true) {
let tab
try {
tab = await this.browser.tabs.get(tabId)
}
catch (closed) {
return 'close'
}
if (tab.title.match(/^Error 404/)) return 'error-404'
await this.ex.sleep(200)
}
},
async tabEval(code) {
if (tri.browserBg) return eval(code)
const resultList = await this.browser.tabs.executeScript(
this.tabId,
{code}
)
return resultList[0]
},
async tabopen(url) {
return await this.browser.tabs.create({
active: true,
url: url,
windowId: this.windowId
})
},
async dlan(list = $all("a>h3")) {
for (const h of list) {
await this.downloadFromAnchor(h.parentNode) &&
await tri.excmds.sleep(5e3);
}
if (confirm("next?")) $("#pnnext").click();
},
Defer: function Defer() {
this.promise = new Promise((resolve, reject) => {
this.resolve = resolve
this.reject = reject
})
},
async readListFromClipboardToState(x) {
let l
if (!x) x = await this.ex.getclip('clipboard')
if (typeof x == 'string') l = x.split('\n')
else l = x
tri.state.komica_wiki_data = l
}
};
// eb tri.downloadJob.stop = true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment