Last active
July 8, 2019 19:12
-
-
Save alternativecutegirls/40e070810ff42faeef206ec2bf6e2d6c to your computer and use it in GitHub Desktop.
4chan file/webm downloader/pipe it to mpv etc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json, re, sys, os,time | |
from urllib.request import urlopen as u | |
''' | |
it searches for all files, not only webms. (jpg, png, gif, webm). | |
usage examples: | |
Searches for 'Ricardo Thread' in the board wsg and print all file links. | |
Useful for piping to mpv and watching them. (e.g.) | |
4chanwebm.py board 'bla bla' --print_links | xargs mpv | |
python 4chanwebm.py wsg 'Ricado Thread' --print_links | |
Searches for 'trap thread' in board gif and saves and files to current directory. | |
python 4chanwebm.py gif 'trap thread' --save | |
Prints all threads in the board /a/. | |
python 4chanwebm.py a --catalog | |
''' | |
if "--no-colors" in sys.argv: | |
class bc: | |
HEADER = '' | |
OKBLUE = '' | |
OKGREEN = '' | |
WARNING = '' | |
FAIL = '' | |
ENDC = '' | |
BOLD = '' | |
UNDERLINE = '' | |
else: | |
class bc: | |
HEADER = '\033[95m' | |
OKBLUE = '\033[38;5;192m' | |
BLACKFG = '\033[38;5;236m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
#board = str(''.join(re.findall("\/(\w+)\/", url))) | |
def cleanStr(raw_str): | |
space2 = re.sub('<br><br>', '<br>', raw_str) | |
space = re.sub('<br>', '\n', space2) | |
cleanExpr = re.compile('<.*?>') | |
tagsclean = re.sub(cleanExpr, '', space) | |
gts = re.sub("\>\;", '>', tagsclean) | |
return gts | |
def printTitleCom(catal): | |
for page in range(len(catal)): | |
for thread in range(len(catal[page]["threads"])): | |
bread = catal[page]["threads"][thread] | |
try: | |
print(bc.OKBLUE+bc.BLACKFG+"*"+bc.ENDC+bread['sub']) | |
except KeyError: | |
comment = cleanStr(bread['com']) | |
print(comment[:75]) | |
def print_shit(bread): | |
try: | |
print(bread['sub']) | |
except KeyError: | |
pass | |
try: | |
comm = cleanStr(bread['com']) | |
print(bc.OKBLUE+"COM:", comm.format(3,5)+bc.ENDC) | |
except KeyError: | |
pass | |
# com = (comm[:200] + '...') if len(bread['com']) > 200 else bread['com'] | |
# print(bc.OKBLUE+bc.BLACKFG+"PAGE: %d\nTHREAD: %d" % (page,thread)+bc.ENDC) | |
print("FILES:", bread['images']) | |
print("REPLIES:", bread['replies']) | |
print("REPLIES:", bread['filename']) | |
print("NUM:", bread['no']) | |
# Searches in thread title for string | |
def searchBread(catal, string): | |
for page in range(len(catal)): | |
for thread in range(len(catal[page]["threads"])): | |
bread = catal[page]["threads"][thread] | |
try: | |
if re.search(string, cleanStr(bread['sub'])): | |
print_shit(bread,page,thread) | |
return bread['no'] | |
except KeyError: | |
continue | |
def searchBreadCom(catal, string): | |
found = False | |
for page in range(len(catal)): | |
for thread in range(len(catal[page]["threads"])): | |
searchBreadCom.bread = catal[page]["threads"][thread] | |
# print("iterating in", page, "thread", thread) | |
try: | |
if re.search(string, searchBreadCom.bread['com']): | |
# print_shit(bread,page,thread) | |
# print("FOUND SUB") | |
return searchBreadCom.bread['no'] | |
except KeyError: | |
pass | |
try: | |
if re.search(string, searchBreadCom.bread['sub']): | |
# print_shit(bread,page,thread) | |
# print("FOUND COM") | |
return searchBreadCom.bread['no'] | |
except KeyError: | |
continue | |
def saveShit(trUrl): | |
skipped = 0 | |
bread = json.loads(u(trUrl).read()) | |
## print(bread["posts"][0]['semantic_url']) | |
llist = ["https://i.4cdn.org/"+board+"/"+str(post['tim'])+post['ext'] | |
for post in bread["posts"] | |
if post.get("ext")] | |
if "--print_links" in sys.argv: | |
print('\n'.join(llist)) | |
sys.exit() | |
for dex,i in enumerate(llist, start=1): | |
fn = re.sub("https://i.4cdn.org\/(\w+)\/", "", i) | |
if fn in os.listdir(): | |
skipped = skipped +1 | |
print(" %d/%d SKIPPED"%(skipped,len(llist)),end="\r") | |
# sys.stdout.write("\033[F") | |
time.sleep(0.05) | |
continue | |
print(bread["posts"][0]['semantic_url'],fn, dex, "OUT OF", len(llist)-1) | |
b = u(i).read() | |
with open(fn, 'wb') as f: | |
f.write(b) | |
# print('\n'.join(llist)) | |
if "--save" in sys.argv: | |
board = sys.argv[1] | |
url = "https://a.4cdn.org/" + sys.argv[1] + "/catalog.json" | |
catal = json.loads(u(url).read()) | |
if "-xom" in sys.argv: | |
cc = searchBreadCom(catal, sys.argv[2]) | |
else: | |
cc = searchBreadCom(catal, sys.argv[2]) | |
if cc == None: | |
print("Nothing found") | |
sys.exit() | |
trString = "https://a.4cdn.org/%s/thread/%s.json" %(board,cc) | |
saveShit(trString) | |
else: | |
board = sys.argv[1] | |
url = "https://a.4cdn.org/" + sys.argv[1] + "/catalog.json" | |
catal = json.loads(u(url).read()) | |
if "--catalog" in sys.argv: | |
printTitleCom(catal) | |
sys.exit() | |
# if "-xom" in sys.argv: | |
# print("Searching comment in", sys.argv[2]) | |
cc = searchBreadCom(catal, sys.argv[2]) | |
# else: | |
# print("") | |
# cc = searchBread(catal, sys.argv[2]) | |
if cc == None: | |
print("Nothing found") | |
sys.exit() | |
trString = "https://a.4cdn.org/%s/thread/%s.json" %(board,cc) | |
# print(trString) | |
# print(len(sys.argv)) | |
# printAll(catal) | |
if "--print_links" in sys.argv: | |
saveShit(trString) | |
print("") | |
if len(sys.argv) < 4: | |
print_shit(searchBreadCom.bread) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment