Skip to content

Instantly share code, notes, and snippets.

@iamtakingiteasy
Last active December 2, 2020 23:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iamtakingiteasy/dbab385394975c4e43993f5f482f781f to your computer and use it in GitHub Desktop.
Save iamtakingiteasy/dbab385394975c4e43993f5f482f781f to your computer and use it in GitHub Desktop.
Sankaku grabber
#!/bin/sh
tags="$(echo $@ | tr ' ' '+')"
BOORU=${BOORU:-idol}
[ -z "$tags" ] || echo $@ | grep -q '\-\-' && { echo "BOORU=(idol|chan) $0 <tags>"; exit; }
awk -v "tags=$tags" -v "booru=$BOORU" '
function doexit(message) {
print(message)
exit
}
function extract(instr, reg, off) {
if (match(instr,reg))
return substr(instr,RSTART+off,RLENGTH-off)
else
return ""
}
function timeout(seconds) {
for (rem=1; rem <= period; rem++) {
printf("\rWent too fast, sleeping for %" length(period) "d/%d seconds...", rem, period)
if (system("sleep 1"))
doexit("")
}
print("")
return seconds * 2
}
function size(ofile) {
cmd = "wc -c \"" ofile "\" 2>&1"
cmd | getline bytes
if (r = close(cmd)) {
return 0
}
return extract(bytes, "^[0-9]*", 0)
}
function human(value) {
sfx[0] = "B "
sfx[1] = "KB"
sfx[2] = "MB"
sfx[3] = "GB"
sfx[4] = "TB"
for (x = 0; value >= 1024; x++)
value /= 1024
return sprintf("%6s", sprintf("%.0f%s", value, sfx[x]));
}
function progress(ofile, content) {
s=0
speed=0
last=size(ofile)
while (1) {
bytes = size(ofile)
percent = sprintf("%.02f%%", bytes / content * 100)
percent = sprintf("%7s", percent)
if (s > 0) {
for (z = 0; z < length(str); z++)
printf("\b")
speed = sprintf("%.0f", (bytes-last) / (s+1))
}
filllen = length(str)
str=sprintf("downloading %s (%s / %s) ~%s / s", percent, human(bytes), human(content), human(speed))
printf("%s", str)
if (s > 0) {
diff = filllen - length(str)
for (z = 0; z < diff; z++)
printf(" ")
for (z = 0; z < diff; z++)
printf("\b")
}
if (bytes == content) {
for (z = 0; z < length(str); z++)
printf("\b")
break
} else {
if (system("sleep 1"))
doexit("")
}
s++
}
for (z = 0; z < length(str); z++)
printf(" ")
for (z = 0; z < length(str)+15; z++)
printf("\b")
for (z = 0; z < 15; z++)
printf(" ")
for (z = 0; z < 15; z++)
printf("\b")
print("complete")
}
function fetch(arr, url, ofile, flags) {
delete arr
period=16
headers=1
done=0
while (1) {
dobreak=1
if (ofile != "-") {
precontent=size(ofile)
} else {
precontent=0
}
cmd = "sleep 1 && wget -q -S -U CirnOS \"" url "\" -O \"" ofile "\"" flags " 2>&1"
for (i = 0; (cmd | getline out) > 0;) {
if (headers && match(out, /^ /)) {
code=extract(out, "HTTP/1.1 [0-9]*", 9)
if (code != "") {
if (code == 429) {
period = timeout(period)
dobreak=0
break
}
if (code == 416 || code == 406) {
dobreak=0
done=1
break
}
}
content = extract(out, "Content-Length: [0-9]*", 16)
if (content != "")
progress(ofile, precontent+content)
continue
}
headers=0
arr[i++]=out
}
if (close(cmd) && dobreak)
doexit("")
if (dobreak || done)
break
}
}
function postfrom(postid, posturl) {
fname=postid "-" extract(posturl,"/[^/]*$",1)
fetch(nil, posturl, "tmp-" fname, " -c")
system("mv \"tmp-" fname "\" \"" fname "\"")
}
BEGIN {
baseurl="https://" booru ".sankakucomplex.com"
page=1
print("Downloading tags " tags)
url=baseurl "/post/index.content?tags=" tags
while (1) {
print("Processing page " page++)
fetch(data, url, "-", "")
nextpage=extract(data[0], "next-page-url=\"?[^\"]*", 15)
url=baseurl "/post/index.content" nextpage
pidx=0
delete posts
for (m in data) {
postid=extract(data[m], "href=\"/post/show/[0-9]*", 17)
if (postid)
posts[pidx++]=postid
}
for (m in posts) {
postid=posts[m]
printf("post %" length(length(posts)) "d/%d (#%s) -- ",m+1,length(posts),postid)
if (system("[ -e " postid "-* ]") == 0) {
print("complete")
continue
}
printf("getting url... ")
fetch(post, baseurl "/post/show/" postid, "-", "")
for (lineidx in post) {
line=post[lineidx]
if (match(line, /id=\"?image/)) {
if (match(line, /<a.*class=\"sample/)) {
posturl="https:" extract(line,"href=\"//[^\"?]*", 6)
postfrom(postid, posturl)
break
} else if (match(line, /<(img|video)/)) {
posturl="https:" extract(line,"src=\"//[^\"?]*", 5)
postfrom(postid, posturl)
break
}
}
}
}
if (nextpage == "")
doexit("done")
}
}
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment