arunkarnann/urlgrabber.py

## urlgrabber.py
from bs4 import BeautifulSoup
import os
try:
    import urllib.request as urllib2
except ImportError:
    import urllib2
import sys


#Souce html from which the  you want stuff to download
source_url = "http://www.mayuren.org/site/mayurengorg/1Tamil/Audio%20Books%20-%20Tamil%20Collection/ponniyin%20selvan/ponni%201"
#write url as text file to download it with Aria2c downloader, remember to make it in aria2c folder and create the fil urls.txt
aria2c_filepath = "C:/Users/User2/Desktop/aria2-1.28.0-win-64bit-build1/urls.txt"


html_page = urllib2.urlopen(source_url)
soup = BeautifulSoup(html_page)

#print All links in the html pages, i used for my first execution to analyse the url and write the rule for it.
'''
for all_links in soup.findAll('a'):
    the_url = all_links.get('href')
    print(the_url)   '''

#Deleting existing contents from the file
f = open(aria2c_filepath, 'w')
f.close()


for link in soup.findAll('a'):
    temp_reader = link.get('href')
    if(temp_reader!=None):
        if(".mp3?l=12" in temp_reader):
            url = ("http://www.mayuren.org"+temp_reader).split("?l=12")[0];
            with open(aria2c_filepath, 'a') as f:
                print(url, file=f)
	from bs4 import BeautifulSoup
	import os
	try:
	import urllib.request as urllib2
	except ImportError:
	import urllib2
	import sys


	#Souce html from which the you want stuff to download
	source_url = "http://www.mayuren.org/site/mayurengorg/1Tamil/Audio%20Books%20-%20Tamil%20Collection/ponniyin%20selvan/ponni%201"
	#write url as text file to download it with Aria2c downloader, remember to make it in aria2c folder and create the fil urls.txt
	aria2c_filepath = "C:/Users/User2/Desktop/aria2-1.28.0-win-64bit-build1/urls.txt"



	html_page = urllib2.urlopen(source_url)
	soup = BeautifulSoup(html_page)

	#print All links in the html pages, i used for my first execution to analyse the url and write the rule for it.
	'''
	for all_links in soup.findAll('a'):
	the_url = all_links.get('href')
	print(the_url) '''

	#Deleting existing contents from the file
	f = open(aria2c_filepath, 'w')
	f.close()


	for link in soup.findAll('a'):
	temp_reader = link.get('href')
	if(temp_reader!=None):
	if(".mp3?l=12" in temp_reader):
	url = ("http://www.mayuren.org"+temp_reader).split("?l=12")[0];
	with open(aria2c_filepath, 'a') as f:
	print(url, file=f)