Skip to content

Instantly share code, notes, and snippets.

@alternativecutegirls
Last active July 8, 2019 19:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alternativecutegirls/40e070810ff42faeef206ec2bf6e2d6c to your computer and use it in GitHub Desktop.
Save alternativecutegirls/40e070810ff42faeef206ec2bf6e2d6c to your computer and use it in GitHub Desktop.
4chan file/webm downloader/pipe it to mpv etc
import json, re, sys, os,time
from urllib.request import urlopen as u
'''
it searches for all files, not only webms. (jpg, png, gif, webm).
usage examples:
Searches for 'Ricardo Thread' in the board wsg and print all file links.
Useful for piping to mpv and watching them. (e.g.)
4chanwebm.py board 'bla bla' --print_links | xargs mpv
python 4chanwebm.py wsg 'Ricado Thread' --print_links
Searches for 'trap thread' in board gif and saves and files to current directory.
python 4chanwebm.py gif 'trap thread' --save
Prints all threads in the board /a/.
python 4chanwebm.py a --catalog
'''
if "--no-colors" in sys.argv:
class bc:
HEADER = ''
OKBLUE = ''
OKGREEN = ''
WARNING = ''
FAIL = ''
ENDC = ''
BOLD = ''
UNDERLINE = ''
else:
class bc:
HEADER = '\033[95m'
OKBLUE = '\033[38;5;192m'
BLACKFG = '\033[38;5;236m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
#board = str(''.join(re.findall("\/(\w+)\/", url)))
def cleanStr(raw_str):
space2 = re.sub('<br><br>', '<br>', raw_str)
space = re.sub('<br>', '\n', space2)
cleanExpr = re.compile('<.*?>')
tagsclean = re.sub(cleanExpr, '', space)
gts = re.sub("\&gt\;", '>', tagsclean)
return gts
def printTitleCom(catal):
for page in range(len(catal)):
for thread in range(len(catal[page]["threads"])):
bread = catal[page]["threads"][thread]
try:
print(bc.OKBLUE+bc.BLACKFG+"*"+bc.ENDC+bread['sub'])
except KeyError:
comment = cleanStr(bread['com'])
print(comment[:75])
def print_shit(bread):
try:
print(bread['sub'])
except KeyError:
pass
try:
comm = cleanStr(bread['com'])
print(bc.OKBLUE+"COM:", comm.format(3,5)+bc.ENDC)
except KeyError:
pass
# com = (comm[:200] + '...') if len(bread['com']) > 200 else bread['com']
# print(bc.OKBLUE+bc.BLACKFG+"PAGE: %d\nTHREAD: %d" % (page,thread)+bc.ENDC)
print("FILES:", bread['images'])
print("REPLIES:", bread['replies'])
print("REPLIES:", bread['filename'])
print("NUM:", bread['no'])
# Searches in thread title for string
def searchBread(catal, string):
for page in range(len(catal)):
for thread in range(len(catal[page]["threads"])):
bread = catal[page]["threads"][thread]
try:
if re.search(string, cleanStr(bread['sub'])):
print_shit(bread,page,thread)
return bread['no']
except KeyError:
continue
def searchBreadCom(catal, string):
found = False
for page in range(len(catal)):
for thread in range(len(catal[page]["threads"])):
searchBreadCom.bread = catal[page]["threads"][thread]
# print("iterating in", page, "thread", thread)
try:
if re.search(string, searchBreadCom.bread['com']):
# print_shit(bread,page,thread)
# print("FOUND SUB")
return searchBreadCom.bread['no']
except KeyError:
pass
try:
if re.search(string, searchBreadCom.bread['sub']):
# print_shit(bread,page,thread)
# print("FOUND COM")
return searchBreadCom.bread['no']
except KeyError:
continue
def saveShit(trUrl):
skipped = 0
bread = json.loads(u(trUrl).read())
## print(bread["posts"][0]['semantic_url'])
llist = ["https://i.4cdn.org/"+board+"/"+str(post['tim'])+post['ext']
for post in bread["posts"]
if post.get("ext")]
if "--print_links" in sys.argv:
print('\n'.join(llist))
sys.exit()
for dex,i in enumerate(llist, start=1):
fn = re.sub("https://i.4cdn.org\/(\w+)\/", "", i)
if fn in os.listdir():
skipped = skipped +1
print(" %d/%d SKIPPED"%(skipped,len(llist)),end="\r")
# sys.stdout.write("\033[F")
time.sleep(0.05)
continue
print(bread["posts"][0]['semantic_url'],fn, dex, "OUT OF", len(llist)-1)
b = u(i).read()
with open(fn, 'wb') as f:
f.write(b)
# print('\n'.join(llist))
if "--save" in sys.argv:
board = sys.argv[1]
url = "https://a.4cdn.org/" + sys.argv[1] + "/catalog.json"
catal = json.loads(u(url).read())
if "-xom" in sys.argv:
cc = searchBreadCom(catal, sys.argv[2])
else:
cc = searchBreadCom(catal, sys.argv[2])
if cc == None:
print("Nothing found")
sys.exit()
trString = "https://a.4cdn.org/%s/thread/%s.json" %(board,cc)
saveShit(trString)
else:
board = sys.argv[1]
url = "https://a.4cdn.org/" + sys.argv[1] + "/catalog.json"
catal = json.loads(u(url).read())
if "--catalog" in sys.argv:
printTitleCom(catal)
sys.exit()
# if "-xom" in sys.argv:
# print("Searching comment in", sys.argv[2])
cc = searchBreadCom(catal, sys.argv[2])
# else:
# print("")
# cc = searchBread(catal, sys.argv[2])
if cc == None:
print("Nothing found")
sys.exit()
trString = "https://a.4cdn.org/%s/thread/%s.json" %(board,cc)
# print(trString)
# print(len(sys.argv))
# printAll(catal)
if "--print_links" in sys.argv:
saveShit(trString)
print("")
if len(sys.argv) < 4:
print_shit(searchBreadCom.bread)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment