Skip to content

Instantly share code, notes, and snippets.

@sfan5
Last active March 20, 2016 16:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sfan5/1cec90d7dfda457e0065 to your computer and use it in GitHub Desktop.
Save sfan5/1cec90d7dfda457e0065 to your computer and use it in GitHub Desktop.
Download all your fav'ed @catgirls_bot pictures and their original images (where possible)
// Go to https://twitter.com/favorites and scoll all the way until there are no more fav'd @catgirls_bot tweets
// maybe like this:
// window.temp=function(){window.scrollTo(0,document.body.scrollHeight);setTimeout(window.temp,750)};window.temp()
// window.temp=function(){}; // stop like this
var _q = document.getElementById('stream-items-id');
var _i;
var _o = [];
for(_i = 0; _i < _q.children.length; _i++) {
var _e = _q.children[_i];
if(_e.children[0].getAttribute('data-screen-name') == 'catgirls_bot') {
var _a = _e.children[0].getAttribute('data-tweet-id');
_e = _e.children[0].children[1];
var _l = _e.children[1].children[0].children[0].getAttribute('data-expanded-url');
var _p = _e.children[2].children[0].children[0].children[0].getAttribute('data-image-url') + ":large";
_o.push({id: _a, link: _l, picture: _p});
}
}
var _w = window.open();
if(_w) {
_w.document.open();
_w.document.write('<h1>Copy this and save it into <i>favs.json</i></h1>');
_w.document.write('<pre>');
_w.document.write(JSON.stringify(_o, null, ' '));
_w.document.write('</pre>');
_w.document.close();
}
#!/usr/bin/env python3
import sys
import urllib.request
import re
import getopt
##################
PHPSESSID = "your_phpsessid_here"
##################
def gopt(opts, n):
v = None
for opt in opts:
if opt[0] == n:
v = opt[1]
return v
r_image = re.compile(r'<img alt="[^"]+" width="[0-9]+" height="[0-9]+" data-src="([a-z0-9:/.\-_]+)" class="original-image">')
r_manga = re.compile(r'<a href="member_illust\.php\?mode=manga&amp;illust_id=[0-9]+"')
r_mangaimage = re.compile(r'<img src="([a-z0-9:/.\-_]+)"')
try:
opts, args = getopt.getopt(sys.argv[1:], "o:q", ['no-manga'])
except getopt.GetoptError as e:
print(str(e))
exit(1)
if len(args) < 1:
print("Usage: %s [-q] [--no-manga] [-o FILENAME] <pixiv ID> [2nd argument]" % sys.argv[0])
print("The 2nd argument is usually the page number.")
exit(1)
def urlopen(url, headers):
h = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Cookie": "PHPSESSID=%s; p_ab_id=4; a_type=0" % PHPSESSID
}
h.update(headers)
r = urllib.request.Request(url, headers=h)
return urllib.request.urlopen(r)
pixiv_id = int(args[0])
arg2 = "" if len(args) < 2 else args[1].strip()
page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=%d" % pixiv_id
r = urlopen(page, {})
data = r.read().decode("utf-8", "ignore")
r.close()
if r_manga.search(data): # Manga
if ('--no-manga', '') in opts:
if not ('-q', '') in opts:
print("Illustration is a manga and --no-manga given, exiting.")
exit(1)
if arg2 == "":
if not ('-q', '') in opts:
print("Illustration is a manga but no page number given, downloading first page by default")
print("Set the page number by giving the script a second argument")
page_no = 0
else:
page_no = int(arg2) - 1
page = "http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=%d&page=%d" % (pixiv_id, page_no)
r = urlopen(page, {})
data = r.read().decode("utf-8", "ignore")
r.close()
m = r_mangaimage.search(data)
if not m:
print("Couldn't find URL")
exit(1)
imgurl = m.group(1)
else: # Single image
m = r_image.search(data)
if not m:
print("Couldn't find URL")
exit(1)
imgurl = m.group(1)
if not ('-q', '') in opts:
print("Downloading %s..." % imgurl)
filename = imgurl.split("/")[-1]
if gopt(opts, '-o'):
filename = gopt(opts, '-o') + "." + filename.split(".")[-1]
f = open(filename, "wb")
r = urlopen(imgurl, {"Referer": page})
while True:
data = r.read(256 * 1024) # 256 KiB
if not data:
break
f.write(data)
r.close()
f.close()
if not ('-q', '') in opts:
print("Saved work %d as %s" % (pixiv_id, filename))
#!/usr/bin/env python3
import json
import urllib
import urllib.request
import re
import subprocess
import multiprocessing
import time
infile = "favs.json"
outdir = "/tmp/twpics/"
nworkers_twit = 4
nworkers_orig = 5
r_ext = re.compile(r'\.([a-z]+):large$')
r_pixiv = re.compile(r'^http:\/\/www\.pixiv\.net\/member_illust\.php\?mode=medium&illust_id=([0-9]+)$')
r_danbooru = re.compile(r'^https?:\/\/danbooru\.donmai\.us\/posts\/[0-9]+$')
r_yandere = re.compile(r'^https?:\/\/yande\.re\/post\/show\/[0-9]+$')
r_yandere_orig = re.compile(r'<a class="original-file-changed" id="highres" href="https:\/\/files\.yande\.re\/image\/([0-9a-z]{32})\/[^"]+\.([a-z]{,3})">')
def urlopen(url, headers={}):
h = {}
h["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0"
h.update(headers)
r = urllib.request.Request(url, headers=h)
return urllib.request.urlopen(r)
def fdcopy(fp1, fp2):
while True:
data = fp1.read(1024 * 1024)
if not data:
return
fp2.write(data)
def download_original(link, filebase):
if r_pixiv.search(link):
pixiv_id = r_pixiv.search(link).group(1)
p = subprocess.Popen(["/path/to/pixivdl.py", "-q", "--no-manga", "-o", filebase, pixiv_id])
p.wait()
return (p.returncode == 0)
elif r_danbooru.search(link):
s = urlopen(link + ".json")
data = json.loads(s.read().decode("utf-8", "ignore"))
s.close()
if "file_url" not in data: # deleted image
return False
link = data["file_url"]
s = urlopen("http://danbooru.donmai.us" + link)
f = open(filebase + "." + link.split(".")[-1], "wb")
fdcopy(s, f)
s.close()
f.close()
return True
elif r_yandere.search(link):
s = urlopen(link)
data = s.read().decode("utf-8", "ignore")
s.close()
m = r_yandere_orig.search(data)
if not m:
return False
s = urlopen("https://files.yande.re/image/%s/a.%s" % m.groups())
f = open(filebase + "." + m.group(2), "wb")
fdcopy(s, f)
s.close()
f.close()
return True
else:
return False
def worker_twit(queue):
# Downloads twitter image
for e in iter(queue.get, "STOP"):
print("worker_twit: " + e["id"])
picext = r_ext.search(e["picture"]).group(1)
s = urlopen(e["picture"].replace(":large", ":orig"))
f = open(outdir + e["id"] + "." + picext, "wb")
fdcopy(s, f)
s.close()
f.close()
def worker_orig(queue):
# Tries to download orignal image, otherwise notes URL in .txt file
for e in iter(queue.get, "STOP"):
print("worker_orig: " + e["id"])
if not download_original(e["link"], outdir + e["id"] + "_o"):
f = open(outdir + e["id"] + "_o.txt", "w")
f.write(e["link"])
f.write("\n")
f.close()
f = open(infile, "r")
data = json.load(f)
f.close()
queue_twit = multiprocessing.SimpleQueue()
queue_orig = multiprocessing.SimpleQueue()
for i in range(nworkers_twit):
multiprocessing.Process(target=worker_twit, args=(queue_twit, )).start()
for i in range(nworkers_orig):
multiprocessing.Process(target=worker_orig, args=(queue_orig, )).start()
for e in data:
queue_twit.put(e)
if not e['link'] is None:
queue_orig.put(e)
for i in range(nworkers_twit):
queue_twit.put("STOP")
for i in range(nworkers_orig):
queue_orig.put("STOP")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment