neoadventist/getNaruto.py

## getNaruto.py
#BELEIVE IT!!
print("Jesus Saves");

#import libaries
import sys;
import time;
import urllib;
from urllib import FancyURLopener;
from random import choice;

#declare global variables
global num;
global numEnd;
num = int(sys.argv[1]);

if(len(sys.argv)==2):
	numEnd = num;
else:
	numEnd = int(sys.argv[2]);
#use random user agent so that the server doesn't kick us out!
user_agents = [
	'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
	'Opera/9.25 (Windows NT 5.1; U; en)',
	'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
	'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
	'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
	'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
];

def getWebPage(episode):
	naruto = 'http://www.narutonine.com/NarutoEpisode'+str(episode)+'EnglishDubbed.html';
	print("Naruto URL: "+naruto);
	class MyOpener(FancyURLopener, object):
		version = choice(user_agents)

	fileName = 'file';
	myopener = MyOpener()
	myopener.retrieve(naruto, fileName)
	return fileName

def verifyPage(fileName):
	f=open(fileName, 'r');
	lines = f.readlines();
	f.close();

	lineArray = [];
	for x in lines:
		lineArray.append(x);

	escaped = False;
	for idx, val in enumerate(lineArray):
		msg = str(idx)+' = '+str(lineArray[idx]);
		if (msg.find("unescape")!=-1):
			escaped = str(lineArray[idx]);
		elif (msg.find("Error 406")!=-1):
			print("ERROR 406 FOUND!!!");
			return(406);
	if(escaped==False): #if for some reason it doesn't work, return an error!
		return("ERROR");
	unescape = escaped.split("unescape");

	needtoclean = unescape[1];
	needtoclean = needtoclean.replace("(","");
	needtoclean = needtoclean.replace(")","");

	iframe = urllib.unquote(needtoclean);
	src = iframe.split("src=");
	url = src[1].replace('"></iframe><br>','');
	url = url.replace('"','');
	url = url.split("&");
	return url[0];


def getMp4(url):
	global num;
	class MyOpener(FancyURLopener, object):
		version = choice(user_agents)
	myopener = MyOpener()
	myopener.retrieve(url, 'closer');
	f=open('closer', 'r');
	lines = f.readlines();
	f.close();

	lineArray = [];
	for x in lines:
		lineArray.append(x);

	for idx, val in enumerate(lineArray):
		msg = str(idx)+' = '+str(lineArray[idx]);
		if(msg.find("Bad Request")!=-1):
			print ("ERROR: "+msg);
			return (400);
		elif(msg.find("so.addVariable('file',")!=1):
			doc = str(lineArray[idx]);
			doc = doc.split("so.addVariable('file','");
			for i, v in enumerate(doc):
				if(doc[i].find("mp4")!=-1):
					mp4 = doc[i].replace("');","");

	print("MP4 URL: "+str(mp4));
	name = str(num)+'.mp4';
	myopener.retrieve(mp4, name);
	print(name+" DONE!!");

def main():
	global num;
	global numEnd;
	print("Downloading Naruto Episodes "+str(num)+" to "+str(numEnd));
	completedCount =0;
	while(num<=numEnd):
		c=0;
		url='';
		while (True):
			f = getWebPage(num);
			url = verifyPage(f);
			if(url!=406):
				break;
			if(c>10):
				break;
			c+=1;
			if(url==406):
				time.sleep(c*10); #if we can't get the webpage, wait and retry--we might be asking for too much too fast!
		if(url!=''):
			getMp4(url);
		num+=1;
		completedCount+=1;
	print("Finished! Downloaded "+str(completedCount)+" Episodes!");

main(); #go!
	#BELEIVE IT!!
	print("Jesus Saves");

	#import libaries
	import sys;
	import time;
	import urllib;
	from urllib import FancyURLopener;
	from random import choice;

	#declare global variables
	global num;
	global numEnd;
	num = int(sys.argv[1]);

	if(len(sys.argv)==2):
	numEnd = num;
	else:
	numEnd = int(sys.argv[2]);
	#use random user agent so that the server doesn't kick us out!
	user_agents = [
	'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
	'Opera/9.25 (Windows NT 5.1; U; en)',
	'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
	'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
	'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
	'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
	];

	def getWebPage(episode):
	naruto = 'http://www.narutonine.com/NarutoEpisode'+str(episode)+'EnglishDubbed.html';
	print("Naruto URL: "+naruto);
	class MyOpener(FancyURLopener, object):
	version = choice(user_agents)

	fileName = 'file';
	myopener = MyOpener()
	myopener.retrieve(naruto, fileName)
	return fileName

	def verifyPage(fileName):
	f=open(fileName, 'r');
	lines = f.readlines();
	f.close();

	lineArray = [];
	for x in lines:
	lineArray.append(x);

	escaped = False;
	for idx, val in enumerate(lineArray):
	msg = str(idx)+' = '+str(lineArray[idx]);
	if (msg.find("unescape")!=-1):
	escaped = str(lineArray[idx]);
	elif (msg.find("Error 406")!=-1):
	print("ERROR 406 FOUND!!!");
	return(406);
	if(escaped==False): #if for some reason it doesn't work, return an error!
	return("ERROR");
	unescape = escaped.split("unescape");

	needtoclean = unescape[1];
	needtoclean = needtoclean.replace("(","");
	needtoclean = needtoclean.replace(")","");

	iframe = urllib.unquote(needtoclean);
	src = iframe.split("src=");
	url = src[1].replace('"></iframe><br>','');
	url = url.replace('"','');
	url = url.split("&");
	return url[0];


	def getMp4(url):
	global num;
	class MyOpener(FancyURLopener, object):
	version = choice(user_agents)
	myopener = MyOpener()
	myopener.retrieve(url, 'closer');
	f=open('closer', 'r');
	lines = f.readlines();
	f.close();

	lineArray = [];
	for x in lines:
	lineArray.append(x);

	for idx, val in enumerate(lineArray):
	msg = str(idx)+' = '+str(lineArray[idx]);
	if(msg.find("Bad Request")!=-1):
	print ("ERROR: "+msg);
	return (400);
	elif(msg.find("so.addVariable('file',")!=1):
	doc = str(lineArray[idx]);
	doc = doc.split("so.addVariable('file','");
	for i, v in enumerate(doc):
	if(doc[i].find("mp4")!=-1):
	mp4 = doc[i].replace("');","");

	print("MP4 URL: "+str(mp4));
	name = str(num)+'.mp4';
	myopener.retrieve(mp4, name);
	print(name+" DONE!!");

	def main():
	global num;
	global numEnd;
	print("Downloading Naruto Episodes "+str(num)+" to "+str(numEnd));
	completedCount =0;
	while(num<=numEnd):
	c=0;
	url='';
	while (True):
	f = getWebPage(num);
	url = verifyPage(f);
	if(url!=406):
	break;
	if(c>10):
	break;
	c+=1;
	if(url==406):
	time.sleep(c*10); #if we can't get the webpage, wait and retry--we might be asking for too much too fast!
	if(url!=''):
	getMp4(url);
	num+=1;
	completedCount+=1;
	print("Finished! Downloaded "+str(completedCount)+" Episodes!");

	main(); #go!