Skip to content

Instantly share code, notes, and snippets.

Created March 26, 2011 06:33
Show Gist options
  • Save anonymous/888078 to your computer and use it in GitHub Desktop.
Save anonymous/888078 to your computer and use it in GitHub Desktop.
Downloads all the files on a Tokyo Toshokan search page.
#!/usr/bin/env python
#Instructions:
#Download and install python if you have not already. It comes default on most if not all *nix systems and OS X.
#Click "raw" in the upper right part of this box. Save it as ttdownload.py in any folder that you want.
#Download http://www.crummy.com/software/BeautifulSoup/download/3.x/BeautifulSoup-3.0.8/BeautifulSoup.py and save it in the same folder as the previous script.
#Open up a terminal in the folder that you have downloaded everything from.
#Type "python ttdownload.py".
#Search for the files that you want on Tokyo Toshokan.
#Paste the url of the Tokyo Toshokan search page when asked.
#Type the folder name that you want all of the .torrent files saved to (if left blank, it will default to the date and time)
#Now all of the .torrent files are saved in the folder specified.
#Watch some animu.
import re
import os
import sys
import urlparse
import urllib
from datetime import datetime
from BeautifulSoup import BeautifulSoup
#
# Make some colors for a better experiance in *nix
#
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
def disable(self):
self.HEADER = ''
self.OKBLUE = ''
self.OKGREEN = ''
self.WARNING = ''
self.FAIL = ''
self.ENDC = ''
#
# Given a tokyo toshokan page, this function will get all the links,
# and pass the urls to getDownload(url) which will determine if they are valid
# then it reurns all valid links in their .torrent form
#
def getTorrents(url):
print "reading page..."
try:
gatherer = urllib.urlopen(url)
html = gatherer.read()
gatherer.close()
soup = BeautifulSoup("".join(html))
links = soup.findAll(name="a",attrs={"rel":"nofollow","type":"application/x-bittorrent"})
torrents = {}
for link in links:
name = link.string.encode("ascii","ignore")
try:
url = getDownload(link["href"])
torrents[url] = name
except:
print bcolors.WARNING + "could not find a .torrent file for " + name + bcolors.ENDC
pass
return torrents
except:
sys.exit(bcolors.FAIL+"Malformed URL, quitting"+bcolors.ENDC)
#
# Given a link (offsite from tokyotosho) this function will
# try to locate the .torrent file
#
def getDownload(url):
url = url.strip()
#linking directly to the .torrent file
if re.search(".torrent",url):
return url
#nyaatorrents
elif re.search("nyaa",url) or re.search("nyaatorrents",url):
if re.search("page=download",url):
return url
else:
return url.replace("torrentinfo","download")
#idiots that redirect
else:
torrent=urllib.urlopen(url)
mimeType = torrent.info().gettype()
#check the mimetype of the redirected link
if re.search("application/x-bittorrent",mimeType):
return url
#
# Given a .torrent file, this function downloads it to the right folder
#
def download(link,name,folder="torrents_from_py"):
if not os.path.exists(folder):
os.makedirs(folder)
u = urllib.urlopen(link)
data = u.read()
u.close()
f = open(folder+"/"+name+".torrent","wb+")
f.write(data)
f.close()
#
# The body of our program
#
page = ""
folder = ""
if __name__ == '__main__':
# ask the user for the page that they want
page = raw_input("What url: \n")
print "Starting query..."
# get the list of torrents
torrents = getTorrents(page)
print bcolors.OKGREEN + str(len(torrents))+" torrents found" + bcolors.ENDC
if len(torrents)>0:
print "torrents found for the following items"
print "="*50
#print the valid list
for name in torrents.itervalues():
print bcolors.HEADER + name +bcolors.ENDC
print "="*50
# get the folder that they want to save it in
now = datetime.now()
pre = str(now.year) + "_" + str(now.month) + "_" + str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second)
folder = raw_input("Folder name: (leave blank to use "+pre+")\n")
if folder.strip() == "":
folder = pre
#download all the files
print "="*50
for key,value in torrents.iteritems():
print bcolors.OKGREEN + "downloading:",value
try:
download(key,value,folder)
except:
print bcolors.WARNING + "Failed downloading " +value + bcolors.ENDC
print bcolors.ENDC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment