Kinjalrk2k/shattak_downloader.py

## shattak_downloader.py
from bs4 import BeautifulSoup
import urllib
import re

html_page = urllib.request.urlopen("https://www.shattak.com/quordenet/subject?code=APT-101&name=tapas-sir")
soup = BeautifulSoup(html_page)
links = []

for link in soup.find_all('a', href=True):
    links.append(link['href'])

links = list(filter(lambda x: x.startswith('./download'), links))

header = 'https://www.shattak.com/quordenet/'
links  = list(map(lambda x: header+x[1:], links))

import wget

pdf_links = []
for l in links:
    subpage = urllib.request.urlopen(l)
    soup2 = BeautifulSoup(subpage)
    for link in soup2.find_all('a', href=True):
        if link['href'].endswith('.pdf'):
            pdf_links.append(link['href'])
            ded_link = header + link['href'][1:]
            print(ded_link)

            fname = ded_link.split('/')[-1]
            wget.download(ded_link, "files//"+fname)
            print()

print(links)
print(pdf_links)
	from bs4 import BeautifulSoup
	import urllib
	import re

	html_page = urllib.request.urlopen("https://www.shattak.com/quordenet/subject?code=APT-101&name=tapas-sir")
	soup = BeautifulSoup(html_page)
	links = []

	for link in soup.find_all('a', href=True):
	links.append(link['href'])

	links = list(filter(lambda x: x.startswith('./download'), links))

	header = 'https://www.shattak.com/quordenet/'
	links = list(map(lambda x: header+x[1:], links))

	import wget

	pdf_links = []
	for l in links:
	subpage = urllib.request.urlopen(l)
	soup2 = BeautifulSoup(subpage)
	for link in soup2.find_all('a', href=True):
	if link['href'].endswith('.pdf'):
	pdf_links.append(link['href'])
	ded_link = header + link['href'][1:]
	print(ded_link)

	fname = ded_link.split('/')[-1]
	wget.download(ded_link, "files//"+fname)
	print()

	print(links)
	print(pdf_links)