Skip to content

Instantly share code, notes, and snippets.

@arunkarnann
Created November 9, 2016 05:39
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save arunkarnann/b4c02523644905d2d252a808864445a0 to your computer and use it in GitHub Desktop.
python sript to grab .mp3 url from a html page/ web page
from bs4 import BeautifulSoup
import os
try:
import urllib.request as urllib2
except ImportError:
import urllib2
import sys
#Souce html from which the you want stuff to download
source_url = "http://www.mayuren.org/site/mayurengorg/1Tamil/Audio%20Books%20-%20Tamil%20Collection/ponniyin%20selvan/ponni%201"
#write url as text file to download it with Aria2c downloader, remember to make it in aria2c folder and create the fil urls.txt
aria2c_filepath = "C:/Users/User2/Desktop/aria2-1.28.0-win-64bit-build1/urls.txt"
html_page = urllib2.urlopen(source_url)
soup = BeautifulSoup(html_page)
#print All links in the html pages, i used for my first execution to analyse the url and write the rule for it.
'''
for all_links in soup.findAll('a'):
the_url = all_links.get('href')
print(the_url) '''
#Deleting existing contents from the file
f = open(aria2c_filepath, 'w')
f.close()
for link in soup.findAll('a'):
temp_reader = link.get('href')
if(temp_reader!=None):
if(".mp3?l=12" in temp_reader):
url = ("http://www.mayuren.org"+temp_reader).split("?l=12")[0];
with open(aria2c_filepath, 'a') as f:
print(url, file=f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment