Skip to content

Instantly share code, notes, and snippets.

@loadletter
Last active August 29, 2015 14:04
Show Gist options
  • Save loadletter/59991de51341c7907ef9 to your computer and use it in GitHub Desktop.
Save loadletter/59991de51341c7907ef9 to your computer and use it in GitHub Desktop.
requires BeautifulSoup and requests (python-requests & python-beautifulsoup on debian)
#usage script.py removed.csv outputdirectory
import urllib, os, sys, random, time
from BeautifulSoup import BeautifulSoup
import requests
GOOGLE_CACHE = "http://webcache.googleusercontent.com/search?%s"
TRACKER_WIN = "cache:http://g.e-hentai.org/gallerytorrents.php?gid=%i&t=%s"
EH_TRACKER = "http://ehtracker.org/t/"
SESS = requests.Session(headers={"User-Agent": "Mozilla/5.0 (X11; Linux i686; rv:31.0) Gecko/20100101 Firefox/30.0"})
OUT_PATH = sys.argv[2]
def cache_url(gid, tok):
return GOOGLE_CACHE % urllib.urlencode({"q": TRACKER_WIN % (gid, tok)})
def load_csv(filename):
stuff = []
with open(filename) as f:
for l in f.readlines():
gidtok = l.split(',')[0]
try:
spl = gidtok.split('/')
stuff.append((int(spl[0]), spl[1]))
except:
pass
print "Loaded from csv:", len(stuff)
return stuff
def download_torrent(data, gid):
soup = BeautifulSoup(data)
r = soup.findAll('a')
c = 1
for i in r:
if i['href'].startswith(EH_TRACKER):
print gid,
req = SESS.get(i['href'])
if req.status_code == 200:
with open(os.path.join(OUT_PATH, u"%i-%i-%s.torrent" % (gid, c, i.text)), 'wb')as outf:
outf.write(req.content)
c += 1
print "Downloaded:", i.text
else:
print "Error:", req.status_code
def main():
galleries = load_csv(sys.argv[1])
for g in galleries:
u = apply(cache_url, g)
req = SESS.get(u)
if req.status_code == 200:
download_torrent(req.text, g[0])
print g[0], "Data:"
time.sleep(random.randint(0, 3))
else:
print g[0], "Error:", req.status_code
time.sleep(random.randint(0, 10))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment